| Line Number |
../DebugInfoTest/example_mips_dbg.ll
BUT NOT
../DebugInfoTest/example_mips.ll
|
Line Number |
../DebugInfoTest/example_mips.ll
BUT NOT
../DebugInfoTest/example_mips_dbg.ll
|
| 1 |
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// |
1 |
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// |
| 2 |
// |
2 |
// |
| 3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 |
// See https://llvm.org/LICENSE.txt for license information. |
4 |
// See https://llvm.org/LICENSE.txt for license information. |
| 5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 |
// |
6 |
// |
| 7 |
//===----------------------------------------------------------------------===// |
7 |
//===----------------------------------------------------------------------===// |
| 8 |
// |
8 |
// |
| 9 |
// This pass munges the code in the input function to better prepare it for |
9 |
// This pass munges the code in the input function to better prepare it for |
| 10 |
// SelectionDAG-based code generation. This works around limitations in it's |
10 |
// SelectionDAG-based code generation. This works around limitations in it's |
| 11 |
// basic-block-at-a-time approach. It should eventually be removed. |
11 |
// basic-block-at-a-time approach. It should eventually be removed. |
| 12 |
// |
12 |
// |
| 13 |
//===----------------------------------------------------------------------===// |
13 |
//===----------------------------------------------------------------------===// |
| 14 |
|
14 |
|
| 15 |
#include "llvm/ADT/APInt.h" |
15 |
#include "llvm/ADT/APInt.h" |
| 16 |
#include "llvm/ADT/ArrayRef.h" |
16 |
#include "llvm/ADT/ArrayRef.h" |
| 17 |
#include "llvm/ADT/DenseMap.h" |
17 |
#include "llvm/ADT/DenseMap.h" |
| 18 |
#include "llvm/ADT/MapVector.h" |
18 |
#include "llvm/ADT/MapVector.h" |
| 19 |
#include "llvm/ADT/PointerIntPair.h" |
19 |
#include "llvm/ADT/PointerIntPair.h" |
| 20 |
#include "llvm/ADT/STLExtras.h" |
20 |
#include "llvm/ADT/STLExtras.h" |
| 21 |
#include "llvm/ADT/SmallPtrSet.h" |
21 |
#include "llvm/ADT/SmallPtrSet.h" |
| 22 |
#include "llvm/ADT/SmallVector.h" |
22 |
#include "llvm/ADT/SmallVector.h" |
| 23 |
#include "llvm/ADT/Statistic.h" |
23 |
#include "llvm/ADT/Statistic.h" |
| 24 |
#include "llvm/Analysis/BlockFrequencyInfo.h" |
24 |
#include "llvm/Analysis/BlockFrequencyInfo.h" |
| 25 |
#include "llvm/Analysis/BranchProbabilityInfo.h" |
25 |
#include "llvm/Analysis/BranchProbabilityInfo.h" |
| 26 |
#include "llvm/Analysis/InstructionSimplify.h" |
26 |
#include "llvm/Analysis/InstructionSimplify.h" |
| 27 |
#include "llvm/Analysis/LoopInfo.h" |
27 |
#include "llvm/Analysis/LoopInfo.h" |
| 28 |
#include "llvm/Analysis/ProfileSummaryInfo.h" |
28 |
#include "llvm/Analysis/ProfileSummaryInfo.h" |
| 29 |
#include "llvm/Analysis/TargetLibraryInfo.h" |
29 |
#include "llvm/Analysis/TargetLibraryInfo.h" |
| 30 |
#include "llvm/Analysis/TargetTransformInfo.h" |
30 |
#include "llvm/Analysis/TargetTransformInfo.h" |
| 31 |
#include "llvm/Analysis/ValueTracking.h" |
31 |
#include "llvm/Analysis/ValueTracking.h" |
| 32 |
#include "llvm/Analysis/VectorUtils.h" |
32 |
#include "llvm/Analysis/VectorUtils.h" |
| 33 |
#include "llvm/CodeGen/Analysis.h" |
33 |
#include "llvm/CodeGen/Analysis.h" |
| 34 |
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
34 |
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" |
| 35 |
#include "llvm/CodeGen/ISDOpcodes.h" |
35 |
#include "llvm/CodeGen/ISDOpcodes.h" |
| 36 |
#include "llvm/CodeGen/MachineValueType.h" |
36 |
#include "llvm/CodeGen/MachineValueType.h" |
| 37 |
#include "llvm/CodeGen/SelectionDAGNodes.h" |
37 |
#include "llvm/CodeGen/SelectionDAGNodes.h" |
| 38 |
#include "llvm/CodeGen/TargetLowering.h" |
38 |
#include "llvm/CodeGen/TargetLowering.h" |
| 39 |
#include "llvm/CodeGen/TargetPassConfig.h" |
39 |
#include "llvm/CodeGen/TargetPassConfig.h" |
| 40 |
#include "llvm/CodeGen/TargetSubtargetInfo.h" |
40 |
#include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 41 |
#include "llvm/CodeGen/ValueTypes.h" |
41 |
#include "llvm/CodeGen/ValueTypes.h" |
| 42 |
#include "llvm/Config/llvm-config.h" |
42 |
#include "llvm/Config/llvm-config.h" |
| 43 |
#include "llvm/IR/Argument.h" |
43 |
#include "llvm/IR/Argument.h" |
| 44 |
#include "llvm/IR/Attributes.h" |
44 |
#include "llvm/IR/Attributes.h" |
| 45 |
#include "llvm/IR/BasicBlock.h" |
45 |
#include "llvm/IR/BasicBlock.h" |
| 46 |
#include "llvm/IR/Constant.h" |
46 |
#include "llvm/IR/Constant.h" |
| 47 |
#include "llvm/IR/Constants.h" |
47 |
#include "llvm/IR/Constants.h" |
| 48 |
#include "llvm/IR/DataLayout.h" |
48 |
#include "llvm/IR/DataLayout.h" |
| 49 |
#include "llvm/IR/DebugInfo.h" |
49 |
#include "llvm/IR/DebugInfo.h" |
| 50 |
#include "llvm/IR/DerivedTypes.h" |
50 |
#include "llvm/IR/DerivedTypes.h" |
| 51 |
#include "llvm/IR/Dominators.h" |
51 |
#include "llvm/IR/Dominators.h" |
| 52 |
#include "llvm/IR/Function.h" |
52 |
#include "llvm/IR/Function.h" |
| 53 |
#include "llvm/IR/GetElementPtrTypeIterator.h" |
53 |
#include "llvm/IR/GetElementPtrTypeIterator.h" |
| 54 |
#include "llvm/IR/GlobalValue.h" |
54 |
#include "llvm/IR/GlobalValue.h" |
| 55 |
#include "llvm/IR/GlobalVariable.h" |
55 |
#include "llvm/IR/GlobalVariable.h" |
| 56 |
#include "llvm/IR/IRBuilder.h" |
56 |
#include "llvm/IR/IRBuilder.h" |
| 57 |
#include "llvm/IR/InlineAsm.h" |
57 |
#include "llvm/IR/InlineAsm.h" |
| 58 |
#include "llvm/IR/InstrTypes.h" |
58 |
#include "llvm/IR/InstrTypes.h" |
| 59 |
#include "llvm/IR/Instruction.h" |
59 |
#include "llvm/IR/Instruction.h" |
| 60 |
#include "llvm/IR/Instructions.h" |
60 |
#include "llvm/IR/Instructions.h" |
| 61 |
#include "llvm/IR/IntrinsicInst.h" |
61 |
#include "llvm/IR/IntrinsicInst.h" |
| 62 |
#include "llvm/IR/Intrinsics.h" |
62 |
#include "llvm/IR/Intrinsics.h" |
| 63 |
#include "llvm/IR/IntrinsicsAArch64.h" |
63 |
#include "llvm/IR/IntrinsicsAArch64.h" |
| 64 |
#include "llvm/IR/LLVMContext.h" |
64 |
#include "llvm/IR/LLVMContext.h" |
| 65 |
#include "llvm/IR/MDBuilder.h" |
65 |
#include "llvm/IR/MDBuilder.h" |
| 66 |
#include "llvm/IR/Module.h" |
66 |
#include "llvm/IR/Module.h" |
| 67 |
#include "llvm/IR/Operator.h" |
67 |
#include "llvm/IR/Operator.h" |
| 68 |
#include "llvm/IR/PatternMatch.h" |
68 |
#include "llvm/IR/PatternMatch.h" |
| 69 |
#include "llvm/IR/ProfDataUtils.h" |
69 |
#include "llvm/IR/ProfDataUtils.h" |
| 70 |
#include "llvm/IR/Statepoint.h" |
70 |
#include "llvm/IR/Statepoint.h" |
| 71 |
#include "llvm/IR/Type.h" |
71 |
#include "llvm/IR/Type.h" |
| 72 |
#include "llvm/IR/Use.h" |
72 |
#include "llvm/IR/Use.h" |
| 73 |
#include "llvm/IR/User.h" |
73 |
#include "llvm/IR/User.h" |
| 74 |
#include "llvm/IR/Value.h" |
74 |
#include "llvm/IR/Value.h" |
| 75 |
#include "llvm/IR/ValueHandle.h" |
75 |
#include "llvm/IR/ValueHandle.h" |
| 76 |
#include "llvm/IR/ValueMap.h" |
76 |
#include "llvm/IR/ValueMap.h" |
| 77 |
#include "llvm/InitializePasses.h" |
77 |
#include "llvm/InitializePasses.h" |
| 78 |
#include "llvm/Pass.h" |
78 |
#include "llvm/Pass.h" |
| 79 |
#include "llvm/Support/BlockFrequency.h" |
79 |
#include "llvm/Support/BlockFrequency.h" |
| 80 |
#include "llvm/Support/BranchProbability.h" |
80 |
#include "llvm/Support/BranchProbability.h" |
| 81 |
#include "llvm/Support/Casting.h" |
81 |
#include "llvm/Support/Casting.h" |
| 82 |
#include "llvm/Support/CommandLine.h" |
82 |
#include "llvm/Support/CommandLine.h" |
| 83 |
#include "llvm/Support/Compiler.h" |
83 |
#include "llvm/Support/Compiler.h" |
| 84 |
#include "llvm/Support/Debug.h" |
84 |
#include "llvm/Support/Debug.h" |
| 85 |
#include "llvm/Support/ErrorHandling.h" |
85 |
#include "llvm/Support/ErrorHandling.h" |
| 86 |
#include "llvm/Support/MathExtras.h" |
86 |
#include "llvm/Support/MathExtras.h" |
| 87 |
#include "llvm/Support/raw_ostream.h" |
87 |
#include "llvm/Support/raw_ostream.h" |
| 88 |
#include "llvm/Target/TargetMachine.h" |
88 |
#include "llvm/Target/TargetMachine.h" |
| 89 |
#include "llvm/Target/TargetOptions.h" |
89 |
#include "llvm/Target/TargetOptions.h" |
| 90 |
#include "llvm/Transforms/Utils/BasicBlockUtils.h" |
90 |
#include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 91 |
#include "llvm/Transforms/Utils/BypassSlowDivision.h" |
91 |
#include "llvm/Transforms/Utils/BypassSlowDivision.h" |
| 92 |
#include "llvm/Transforms/Utils/Local.h" |
92 |
#include "llvm/Transforms/Utils/Local.h" |
| 93 |
#include "llvm/Transforms/Utils/SimplifyLibCalls.h" |
93 |
#include "llvm/Transforms/Utils/SimplifyLibCalls.h" |
| 94 |
#include "llvm/Transforms/Utils/SizeOpts.h" |
94 |
#include "llvm/Transforms/Utils/SizeOpts.h" |
| 95 |
#include |
95 |
#include |
| 96 |
#include |
96 |
#include |
| 97 |
#include |
97 |
#include |
| 98 |
#include |
98 |
#include |
| 99 |
#include |
99 |
#include |
| 100 |
#include |
100 |
#include |
| 101 |
#include |
101 |
#include |
| 102 |
#include |
102 |
#include |
| 103 |
#include |
103 |
#include |
| 104 |
|
104 |
|
| 105 |
using namespace llvm; |
105 |
using namespace llvm; |
| 106 |
using namespace llvm::PatternMatch; |
106 |
using namespace llvm::PatternMatch; |
| 107 |
|
107 |
|
| 108 |
#define DEBUG_TYPE "codegenprepare" |
108 |
#define DEBUG_TYPE "codegenprepare" |
| 109 |
|
109 |
|
| 110 |
STATISTIC(NumBlocksElim, "Number of blocks eliminated"); |
110 |
STATISTIC(NumBlocksElim, "Number of blocks eliminated"); |
| 111 |
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); |
111 |
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); |
| 112 |
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); |
112 |
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); |
| 113 |
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " |
113 |
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " |
| 114 |
"sunken Cmps"); |
114 |
"sunken Cmps"); |
| 115 |
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " |
115 |
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " |
| 116 |
"of sunken Casts"); |
116 |
"of sunken Casts"); |
| 117 |
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " |
117 |
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " |
| 118 |
"computations were sunk"); |
118 |
"computations were sunk"); |
| 119 |
STATISTIC(NumMemoryInstsPhiCreated, |
119 |
STATISTIC(NumMemoryInstsPhiCreated, |
| 120 |
"Number of phis created when address " |
120 |
"Number of phis created when address " |
| 121 |
"computations were sunk to memory instructions"); |
121 |
"computations were sunk to memory instructions"); |
| 122 |
STATISTIC(NumMemoryInstsSelectCreated, |
122 |
STATISTIC(NumMemoryInstsSelectCreated, |
| 123 |
"Number of select created when address " |
123 |
"Number of select created when address " |
| 124 |
"computations were sunk to memory instructions"); |
124 |
"computations were sunk to memory instructions"); |
| 125 |
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); |
125 |
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); |
| 126 |
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); |
126 |
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); |
| 127 |
STATISTIC(NumAndsAdded, |
127 |
STATISTIC(NumAndsAdded, |
| 128 |
"Number of and mask instructions added to form ext loads"); |
128 |
"Number of and mask instructions added to form ext loads"); |
| 129 |
STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); |
129 |
STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); |
| 130 |
STATISTIC(NumRetsDup, "Number of return instructions duplicated"); |
130 |
STATISTIC(NumRetsDup, "Number of return instructions duplicated"); |
| 131 |
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); |
131 |
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); |
| 132 |
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); |
132 |
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); |
| 133 |
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); |
133 |
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); |
| 134 |
|
134 |
|
| 135 |
static cl::opt DisableBranchOpts( |
135 |
static cl::opt DisableBranchOpts( |
| 136 |
"disable-cgp-branch-opts", cl::Hidden, cl::init(false), |
136 |
"disable-cgp-branch-opts", cl::Hidden, cl::init(false), |
| 137 |
cl::desc("Disable branch optimizations in CodeGenPrepare")); |
137 |
cl::desc("Disable branch optimizations in CodeGenPrepare")); |
| 138 |
|
138 |
|
| 139 |
static cl::opt |
139 |
static cl::opt |
| 140 |
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), |
140 |
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), |
| 141 |
cl::desc("Disable GC optimizations in CodeGenPrepare")); |
141 |
cl::desc("Disable GC optimizations in CodeGenPrepare")); |
| 142 |
|
142 |
|
| 143 |
static cl::opt |
143 |
static cl::opt |
| 144 |
DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, |
144 |
DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, |
| 145 |
cl::init(false), |
145 |
cl::init(false), |
| 146 |
cl::desc("Disable select to branch conversion.")); |
146 |
cl::desc("Disable select to branch conversion.")); |
| 147 |
|
147 |
|
| 148 |
static cl::opt |
148 |
static cl::opt |
| 149 |
AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), |
149 |
AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), |
| 150 |
cl::desc("Address sinking in CGP using GEPs.")); |
150 |
cl::desc("Address sinking in CGP using GEPs.")); |
| 151 |
|
151 |
|
| 152 |
static cl::opt |
152 |
static cl::opt |
| 153 |
EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), |
153 |
EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), |
| 154 |
cl::desc("Enable sinkinig and/cmp into branches.")); |
154 |
cl::desc("Enable sinkinig and/cmp into branches.")); |
| 155 |
|
155 |
|
| 156 |
static cl::opt DisableStoreExtract( |
156 |
static cl::opt DisableStoreExtract( |
| 157 |
"disable-cgp-store-extract", cl::Hidden, cl::init(false), |
157 |
"disable-cgp-store-extract", cl::Hidden, cl::init(false), |
| 158 |
cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); |
158 |
cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); |
| 159 |
|
159 |
|
| 160 |
static cl::opt StressStoreExtract( |
160 |
static cl::opt StressStoreExtract( |
| 161 |
"stress-cgp-store-extract", cl::Hidden, cl::init(false), |
161 |
"stress-cgp-store-extract", cl::Hidden, cl::init(false), |
| 162 |
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); |
162 |
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); |
| 163 |
|
163 |
|
| 164 |
static cl::opt DisableExtLdPromotion( |
164 |
static cl::opt DisableExtLdPromotion( |
| 165 |
"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), |
165 |
"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), |
| 166 |
cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " |
166 |
cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " |
| 167 |
"CodeGenPrepare")); |
167 |
"CodeGenPrepare")); |
| 168 |
|
168 |
|
| 169 |
static cl::opt StressExtLdPromotion( |
169 |
static cl::opt StressExtLdPromotion( |
| 170 |
"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), |
170 |
"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), |
| 171 |
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " |
171 |
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " |
| 172 |
"optimization in CodeGenPrepare")); |
172 |
"optimization in CodeGenPrepare")); |
| 173 |
|
173 |
|
| 174 |
static cl::opt DisablePreheaderProtect( |
174 |
static cl::opt DisablePreheaderProtect( |
| 175 |
"disable-preheader-prot", cl::Hidden, cl::init(false), |
175 |
"disable-preheader-prot", cl::Hidden, cl::init(false), |
| 176 |
cl::desc("Disable protection against removing loop preheaders")); |
176 |
cl::desc("Disable protection against removing loop preheaders")); |
| 177 |
|
177 |
|
| 178 |
static cl::opt ProfileGuidedSectionPrefix( |
178 |
static cl::opt ProfileGuidedSectionPrefix( |
| 179 |
"profile-guided-section-prefix", cl::Hidden, cl::init(true), |
179 |
"profile-guided-section-prefix", cl::Hidden, cl::init(true), |
| 180 |
cl::desc("Use profile info to add section prefix for hot/cold functions")); |
180 |
cl::desc("Use profile info to add section prefix for hot/cold functions")); |
| 181 |
|
181 |
|
| 182 |
static cl::opt ProfileUnknownInSpecialSection( |
182 |
static cl::opt ProfileUnknownInSpecialSection( |
| 183 |
"profile-unknown-in-special-section", cl::Hidden, |
183 |
"profile-unknown-in-special-section", cl::Hidden, |
| 184 |
cl::desc("In profiling mode like sampleFDO, if a function doesn't have " |
184 |
cl::desc("In profiling mode like sampleFDO, if a function doesn't have " |
| 185 |
"profile, we cannot tell the function is cold for sure because " |
185 |
"profile, we cannot tell the function is cold for sure because " |
| 186 |
"it may be a function newly added without ever being sampled. " |
186 |
"it may be a function newly added without ever being sampled. " |
| 187 |
"With the flag enabled, compiler can put such profile unknown " |
187 |
"With the flag enabled, compiler can put such profile unknown " |
| 188 |
"functions into a special section, so runtime system can choose " |
188 |
"functions into a special section, so runtime system can choose " |
| 189 |
"to handle it in a different way than .text section, to save " |
189 |
"to handle it in a different way than .text section, to save " |
| 190 |
"RAM for example. ")); |
190 |
"RAM for example. ")); |
| 191 |
|
191 |
|
| 192 |
static cl::opt BBSectionsGuidedSectionPrefix( |
192 |
static cl::opt BBSectionsGuidedSectionPrefix( |
| 193 |
"bbsections-guided-section-prefix", cl::Hidden, cl::init(true), |
193 |
"bbsections-guided-section-prefix", cl::Hidden, cl::init(true), |
| 194 |
cl::desc("Use the basic-block-sections profile to determine the text " |
194 |
cl::desc("Use the basic-block-sections profile to determine the text " |
| 195 |
"section prefix for hot functions. Functions with " |
195 |
"section prefix for hot functions. Functions with " |
| 196 |
"basic-block-sections profile will be placed in `.text.hot` " |
196 |
"basic-block-sections profile will be placed in `.text.hot` " |
| 197 |
"regardless of their FDO profile info. Other functions won't be " |
197 |
"regardless of their FDO profile info. Other functions won't be " |
| 198 |
"impacted, i.e., their prefixes will be decided by FDO/sampleFDO " |
198 |
"impacted, i.e., their prefixes will be decided by FDO/sampleFDO " |
| 199 |
"profiles.")); |
199 |
"profiles.")); |
| 200 |
|
200 |
|
| 201 |
static cl::opt FreqRatioToSkipMerge( |
201 |
static cl::opt FreqRatioToSkipMerge( |
| 202 |
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), |
202 |
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), |
| 203 |
cl::desc("Skip merging empty blocks if (frequency of empty block) / " |
203 |
cl::desc("Skip merging empty blocks if (frequency of empty block) / " |
| 204 |
"(frequency of destination block) is greater than this ratio")); |
204 |
"(frequency of destination block) is greater than this ratio")); |
| 205 |
|
205 |
|
| 206 |
static cl::opt ForceSplitStore( |
206 |
static cl::opt ForceSplitStore( |
| 207 |
"force-split-store", cl::Hidden, cl::init(false), |
207 |
"force-split-store", cl::Hidden, cl::init(false), |
| 208 |
cl::desc("Force store splitting no matter what the target query says.")); |
208 |
cl::desc("Force store splitting no matter what the target query says.")); |
| 209 |
|
209 |
|
| 210 |
static cl::opt EnableTypePromotionMerge( |
210 |
static cl::opt EnableTypePromotionMerge( |
| 211 |
"cgp-type-promotion-merge", cl::Hidden, |
211 |
"cgp-type-promotion-merge", cl::Hidden, |
| 212 |
cl::desc("Enable merging of redundant sexts when one is dominating" |
212 |
cl::desc("Enable merging of redundant sexts when one is dominating" |
| 213 |
" the other."), |
213 |
" the other."), |
| 214 |
cl::init(true)); |
214 |
cl::init(true)); |
| 215 |
|
215 |
|
| 216 |
static cl::opt DisableComplexAddrModes( |
216 |
static cl::opt DisableComplexAddrModes( |
| 217 |
"disable-complex-addr-modes", cl::Hidden, cl::init(false), |
217 |
"disable-complex-addr-modes", cl::Hidden, cl::init(false), |
| 218 |
cl::desc("Disables combining addressing modes with different parts " |
218 |
cl::desc("Disables combining addressing modes with different parts " |
| 219 |
"in optimizeMemoryInst.")); |
219 |
"in optimizeMemoryInst.")); |
| 220 |
|
220 |
|
| 221 |
static cl::opt |
221 |
static cl::opt |
| 222 |
AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), |
222 |
AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), |
| 223 |
cl::desc("Allow creation of Phis in Address sinking.")); |
223 |
cl::desc("Allow creation of Phis in Address sinking.")); |
| 224 |
|
224 |
|
| 225 |
static cl::opt AddrSinkNewSelects( |
225 |
static cl::opt AddrSinkNewSelects( |
| 226 |
"addr-sink-new-select", cl::Hidden, cl::init(true), |
226 |
"addr-sink-new-select", cl::Hidden, cl::init(true), |
| 227 |
cl::desc("Allow creation of selects in Address sinking.")); |
227 |
cl::desc("Allow creation of selects in Address sinking.")); |
| 228 |
|
228 |
|
| 229 |
static cl::opt AddrSinkCombineBaseReg( |
229 |
static cl::opt AddrSinkCombineBaseReg( |
| 230 |
"addr-sink-combine-base-reg", cl::Hidden, cl::init(true), |
230 |
"addr-sink-combine-base-reg", cl::Hidden, cl::init(true), |
| 231 |
cl::desc("Allow combining of BaseReg field in Address sinking.")); |
231 |
cl::desc("Allow combining of BaseReg field in Address sinking.")); |
| 232 |
|
232 |
|
| 233 |
static cl::opt AddrSinkCombineBaseGV( |
233 |
static cl::opt AddrSinkCombineBaseGV( |
| 234 |
"addr-sink-combine-base-gv", cl::Hidden, cl::init(true), |
234 |
"addr-sink-combine-base-gv", cl::Hidden, cl::init(true), |
| 235 |
cl::desc("Allow combining of BaseGV field in Address sinking.")); |
235 |
cl::desc("Allow combining of BaseGV field in Address sinking.")); |
| 236 |
|
236 |
|
| 237 |
static cl::opt AddrSinkCombineBaseOffs( |
237 |
static cl::opt AddrSinkCombineBaseOffs( |
| 238 |
"addr-sink-combine-base-offs", cl::Hidden, cl::init(true), |
238 |
"addr-sink-combine-base-offs", cl::Hidden, cl::init(true), |
| 239 |
cl::desc("Allow combining of BaseOffs field in Address sinking.")); |
239 |
cl::desc("Allow combining of BaseOffs field in Address sinking.")); |
| 240 |
|
240 |
|
| 241 |
static cl::opt AddrSinkCombineScaledReg( |
241 |
static cl::opt AddrSinkCombineScaledReg( |
| 242 |
"addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), |
242 |
"addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), |
| 243 |
cl::desc("Allow combining of ScaledReg field in Address sinking.")); |
243 |
cl::desc("Allow combining of ScaledReg field in Address sinking.")); |
| 244 |
|
244 |
|
| 245 |
static cl::opt |
245 |
static cl::opt |
| 246 |
EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, |
246 |
EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, |
| 247 |
cl::init(true), |
247 |
cl::init(true), |
| 248 |
cl::desc("Enable splitting large offset of GEP.")); |
248 |
cl::desc("Enable splitting large offset of GEP.")); |
| 249 |
|
249 |
|
| 250 |
static cl::opt EnableICMP_EQToICMP_ST( |
250 |
static cl::opt EnableICMP_EQToICMP_ST( |
| 251 |
"cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), |
251 |
"cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), |
| 252 |
cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); |
252 |
cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); |
| 253 |
|
253 |
|
| 254 |
static cl::opt |
254 |
static cl::opt |
| 255 |
VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), |
255 |
VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), |
| 256 |
cl::desc("Enable BFI update verification for " |
256 |
cl::desc("Enable BFI update verification for " |
| 257 |
"CodeGenPrepare.")); |
257 |
"CodeGenPrepare.")); |
| 258 |
|
258 |
|
| 259 |
static cl::opt |
259 |
static cl::opt |
| 260 |
OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), |
260 |
OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), |
| 261 |
cl::desc("Enable converting phi types in CodeGenPrepare")); |
261 |
cl::desc("Enable converting phi types in CodeGenPrepare")); |
| 262 |
|
262 |
|
| 263 |
static cl::opt |
263 |
static cl::opt |
| 264 |
HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, |
264 |
HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, |
| 265 |
cl::desc("Least BB number of huge function.")); |
265 |
cl::desc("Least BB number of huge function.")); |
| 266 |
|
266 |
|
| 267 |
static cl::opt |
267 |
static cl::opt |
| 268 |
MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), |
268 |
MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), |
| 269 |
cl::Hidden, |
269 |
cl::Hidden, |
| 270 |
cl::desc("Max number of address users to look at")); |
270 |
cl::desc("Max number of address users to look at")); |
| 271 |
namespace { |
271 |
namespace { |
| 272 |
|
272 |
|
| 273 |
enum ExtType { |
273 |
enum ExtType { |
| 274 |
ZeroExtension, // Zero extension has been seen. |
274 |
ZeroExtension, // Zero extension has been seen. |
| 275 |
SignExtension, // Sign extension has been seen. |
275 |
SignExtension, // Sign extension has been seen. |
| 276 |
BothExtension // This extension type is used if we saw sext after |
276 |
BothExtension // This extension type is used if we saw sext after |
| 277 |
// ZeroExtension had been set, or if we saw zext after |
277 |
// ZeroExtension had been set, or if we saw zext after |
| 278 |
// SignExtension had been set. It makes the type |
278 |
// SignExtension had been set. It makes the type |
| 279 |
// information of a promoted instruction invalid. |
279 |
// information of a promoted instruction invalid. |
| 280 |
}; |
280 |
}; |
| 281 |
|
281 |
|
| 282 |
enum ModifyDT { |
282 |
enum ModifyDT { |
| 283 |
NotModifyDT, // Not Modify any DT. |
283 |
NotModifyDT, // Not Modify any DT. |
| 284 |
ModifyBBDT, // Modify the Basic Block Dominator Tree. |
284 |
ModifyBBDT, // Modify the Basic Block Dominator Tree. |
| 285 |
ModifyInstDT // Modify the Instruction Dominator in a Basic Block, |
285 |
ModifyInstDT // Modify the Instruction Dominator in a Basic Block, |
| 286 |
// This usually means we move/delete/insert instruction |
286 |
// This usually means we move/delete/insert instruction |
| 287 |
// in a Basic Block. So we should re-iterate instructions |
287 |
// in a Basic Block. So we should re-iterate instructions |
| 288 |
// in such Basic Block. |
288 |
// in such Basic Block. |
| 289 |
}; |
289 |
}; |
| 290 |
|
290 |
|
| 291 |
using SetOfInstrs = SmallPtrSet; |
291 |
using SetOfInstrs = SmallPtrSet; |
| 292 |
using TypeIsSExt = PointerIntPair; |
292 |
using TypeIsSExt = PointerIntPair; |
| 293 |
using InstrToOrigTy = DenseMap; |
293 |
using InstrToOrigTy = DenseMap; |
| 294 |
using SExts = SmallVector; |
294 |
using SExts = SmallVector; |
| 295 |
using ValueToSExts = MapVector; |
295 |
using ValueToSExts = MapVector; |
| 296 |
|
296 |
|
| 297 |
class TypePromotionTransaction; |
297 |
class TypePromotionTransaction; |
| 298 |
|
298 |
|
| 299 |
class CodeGenPrepare : public FunctionPass { |
299 |
class CodeGenPrepare : public FunctionPass { |
| 300 |
const TargetMachine *TM = nullptr; |
300 |
const TargetMachine *TM = nullptr; |
| 301 |
const TargetSubtargetInfo *SubtargetInfo = nullptr; |
301 |
const TargetSubtargetInfo *SubtargetInfo = nullptr; |
| 302 |
const TargetLowering *TLI = nullptr; |
302 |
const TargetLowering *TLI = nullptr; |
| 303 |
const TargetRegisterInfo *TRI = nullptr; |
303 |
const TargetRegisterInfo *TRI = nullptr; |
| 304 |
const TargetTransformInfo *TTI = nullptr; |
304 |
const TargetTransformInfo *TTI = nullptr; |
| 305 |
const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; |
305 |
const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; |
| 306 |
const TargetLibraryInfo *TLInfo = nullptr; |
306 |
const TargetLibraryInfo *TLInfo = nullptr; |
| 307 |
LoopInfo *LI = nullptr; |
307 |
LoopInfo *LI = nullptr; |
| 308 |
std::unique_ptr BFI; |
308 |
std::unique_ptr BFI; |
| 309 |
std::unique_ptr BPI; |
309 |
std::unique_ptr BPI; |
| 310 |
ProfileSummaryInfo *PSI = nullptr; |
310 |
ProfileSummaryInfo *PSI = nullptr; |
| 311 |
|
311 |
|
| 312 |
/// As we scan instructions optimizing them, this is the next instruction |
312 |
/// As we scan instructions optimizing them, this is the next instruction |
| 313 |
/// to optimize. Transforms that can invalidate this should update it. |
313 |
/// to optimize. Transforms that can invalidate this should update it. |
| 314 |
BasicBlock::iterator CurInstIterator; |
314 |
BasicBlock::iterator CurInstIterator; |
| 315 |
|
315 |
|
| 316 |
/// Keeps track of non-local addresses that have been sunk into a block. |
316 |
/// Keeps track of non-local addresses that have been sunk into a block. |
| 317 |
/// This allows us to avoid inserting duplicate code for blocks with |
317 |
/// This allows us to avoid inserting duplicate code for blocks with |
| 318 |
/// multiple load/stores of the same address. The usage of WeakTrackingVH |
318 |
/// multiple load/stores of the same address. The usage of WeakTrackingVH |
| 319 |
/// enables SunkAddrs to be treated as a cache whose entries can be |
319 |
/// enables SunkAddrs to be treated as a cache whose entries can be |
| 320 |
/// invalidated if a sunken address computation has been erased. |
320 |
/// invalidated if a sunken address computation has been erased. |
| 321 |
ValueMap SunkAddrs; |
321 |
ValueMap SunkAddrs; |
| 322 |
|
322 |
|
| 323 |
/// Keeps track of all instructions inserted for the current function. |
323 |
/// Keeps track of all instructions inserted for the current function. |
| 324 |
SetOfInstrs InsertedInsts; |
324 |
SetOfInstrs InsertedInsts; |
| 325 |
|
325 |
|
| 326 |
/// Keeps track of the type of the related instruction before their |
326 |
/// Keeps track of the type of the related instruction before their |
| 327 |
/// promotion for the current function. |
327 |
/// promotion for the current function. |
| 328 |
InstrToOrigTy PromotedInsts; |
328 |
InstrToOrigTy PromotedInsts; |
| 329 |
|
329 |
|
| 330 |
/// Keep track of instructions removed during promotion. |
330 |
/// Keep track of instructions removed during promotion. |
| 331 |
SetOfInstrs RemovedInsts; |
331 |
SetOfInstrs RemovedInsts; |
| 332 |
|
332 |
|
| 333 |
/// Keep track of sext chains based on their initial value. |
333 |
/// Keep track of sext chains based on their initial value. |
| 334 |
DenseMap SeenChainsForSExt; |
334 |
DenseMap SeenChainsForSExt; |
| 335 |
|
335 |
|
| 336 |
/// Keep track of GEPs accessing the same data structures such as structs or |
336 |
/// Keep track of GEPs accessing the same data structures such as structs or |
| 337 |
/// arrays that are candidates to be split later because of their large |
337 |
/// arrays that are candidates to be split later because of their large |
| 338 |
/// size. |
338 |
/// size. |
| 339 |
MapVector, |
339 |
MapVector, |
| 340 |
SmallVector, int64_t>, 32>> |
340 |
SmallVector, int64_t>, 32>> |
| 341 |
LargeOffsetGEPMap; |
341 |
LargeOffsetGEPMap; |
| 342 |
|
342 |
|
| 343 |
/// Keep track of new GEP base after splitting the GEPs having large offset. |
343 |
/// Keep track of new GEP base after splitting the GEPs having large offset. |
| 344 |
SmallSet, 2> NewGEPBases; |
344 |
SmallSet, 2> NewGEPBases; |
| 345 |
|
345 |
|
| 346 |
/// Map serial numbers to Large offset GEPs. |
346 |
/// Map serial numbers to Large offset GEPs. |
| 347 |
DenseMap, int> LargeOffsetGEPID; |
347 |
DenseMap, int> LargeOffsetGEPID; |
| 348 |
|
348 |
|
| 349 |
/// Keep track of SExt promoted. |
349 |
/// Keep track of SExt promoted. |
| 350 |
ValueToSExts ValToSExtendedUses; |
350 |
ValueToSExts ValToSExtendedUses; |
| 351 |
|
351 |
|
| 352 |
/// True if the function has the OptSize attribute. |
352 |
/// True if the function has the OptSize attribute. |
| 353 |
bool OptSize; |
353 |
bool OptSize; |
| 354 |
|
354 |
|
| 355 |
/// DataLayout for the Function being processed. |
355 |
/// DataLayout for the Function being processed. |
| 356 |
const DataLayout *DL = nullptr; |
356 |
const DataLayout *DL = nullptr; |
| 357 |
|
357 |
|
| 358 |
/// Building the dominator tree can be expensive, so we only build it |
358 |
/// Building the dominator tree can be expensive, so we only build it |
| 359 |
/// lazily and update it when required. |
359 |
/// lazily and update it when required. |
| 360 |
std::unique_ptr DT; |
360 |
std::unique_ptr DT; |
| 361 |
|
361 |
|
| 362 |
public: |
362 |
public: |
| 363 |
/// If encounter huge function, we need to limit the build time. |
363 |
/// If encounter huge function, we need to limit the build time. |
| 364 |
bool IsHugeFunc = false; |
364 |
bool IsHugeFunc = false; |
| 365 |
|
365 |
|
| 366 |
/// FreshBBs is like worklist, it collected the updated BBs which need |
366 |
/// FreshBBs is like worklist, it collected the updated BBs which need |
| 367 |
/// to be optimized again. |
367 |
/// to be optimized again. |
| 368 |
/// Note: Consider building time in this pass, when a BB updated, we need |
368 |
/// Note: Consider building time in this pass, when a BB updated, we need |
| 369 |
/// to insert such BB into FreshBBs for huge function. |
369 |
/// to insert such BB into FreshBBs for huge function. |
| 370 |
SmallSet FreshBBs; |
370 |
SmallSet FreshBBs; |
| 371 |
|
371 |
|
| 372 |
static char ID; // Pass identification, replacement for typeid |
372 |
static char ID; // Pass identification, replacement for typeid |
| 373 |
|
373 |
|
| 374 |
CodeGenPrepare() : FunctionPass(ID) { |
374 |
CodeGenPrepare() : FunctionPass(ID) { |
| 375 |
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); |
375 |
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); |
| 376 |
} |
376 |
} |
| 377 |
|
377 |
|
| 378 |
bool runOnFunction(Function &F) override; |
378 |
bool runOnFunction(Function &F) override; |
| 379 |
|
379 |
|
| 380 |
void releaseMemory() override { |
380 |
void releaseMemory() override { |
| 381 |
// Clear per function information. |
381 |
// Clear per function information. |
| 382 |
InsertedInsts.clear(); |
382 |
InsertedInsts.clear(); |
| 383 |
PromotedInsts.clear(); |
383 |
PromotedInsts.clear(); |
| 384 |
FreshBBs.clear(); |
384 |
FreshBBs.clear(); |
| 385 |
BPI.reset(); |
385 |
BPI.reset(); |
| 386 |
BFI.reset(); |
386 |
BFI.reset(); |
| 387 |
} |
387 |
} |
| 388 |
|
388 |
|
| 389 |
StringRef getPassName() const override { return "CodeGen Prepare"; } |
389 |
StringRef getPassName() const override { return "CodeGen Prepare"; } |
| 390 |
|
390 |
|
| 391 |
void getAnalysisUsage(AnalysisUsage &AU) const override { |
391 |
void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 392 |
// FIXME: When we can selectively preserve passes, preserve the domtree. |
392 |
// FIXME: When we can selectively preserve passes, preserve the domtree. |
| 393 |
AU.addRequired(); |
393 |
AU.addRequired(); |
| 394 |
AU.addRequired(); |
394 |
AU.addRequired(); |
| 395 |
AU.addRequired(); |
395 |
AU.addRequired(); |
| 396 |
AU.addRequired(); |
396 |
AU.addRequired(); |
| 397 |
AU.addRequired(); |
397 |
AU.addRequired(); |
| 398 |
AU.addUsedIfAvailable(); |
398 |
AU.addUsedIfAvailable(); |
| 399 |
} |
399 |
} |
| 400 |
|
400 |
|
| 401 |
private: |
401 |
private: |
| 402 |
template |
402 |
template |
| 403 |
void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { |
403 |
void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { |
| 404 |
// Substituting can cause recursive simplifications, which can invalidate |
404 |
// Substituting can cause recursive simplifications, which can invalidate |
| 405 |
// our iterator. Use a WeakTrackingVH to hold onto it in case this |
405 |
// our iterator. Use a WeakTrackingVH to hold onto it in case this |
| 406 |
// happens. |
406 |
// happens. |
| 407 |
Value *CurValue = &*CurInstIterator; |
407 |
Value *CurValue = &*CurInstIterator; |
| 408 |
WeakTrackingVH IterHandle(CurValue); |
408 |
WeakTrackingVH IterHandle(CurValue); |
| 409 |
|
409 |
|
| 410 |
f(); |
410 |
f(); |
| 411 |
|
411 |
|
| 412 |
// If the iterator instruction was recursively deleted, start over at the |
412 |
// If the iterator instruction was recursively deleted, start over at the |
| 413 |
// start of the block. |
413 |
// start of the block. |
| 414 |
if (IterHandle != CurValue) { |
414 |
if (IterHandle != CurValue) { |
| 415 |
CurInstIterator = BB->begin(); |
415 |
CurInstIterator = BB->begin(); |
| 416 |
SunkAddrs.clear(); |
416 |
SunkAddrs.clear(); |
| 417 |
} |
417 |
} |
| 418 |
} |
418 |
} |
| 419 |
|
419 |
|
| 420 |
// Get the DominatorTree, building if necessary. |
420 |
// Get the DominatorTree, building if necessary. |
| 421 |
DominatorTree &getDT(Function &F) { |
421 |
DominatorTree &getDT(Function &F) { |
| 422 |
if (!DT) |
422 |
if (!DT) |
| 423 |
DT = std::make_unique(F); |
423 |
DT = std::make_unique(F); |
| 424 |
return *DT; |
424 |
return *DT; |
| 425 |
} |
425 |
} |
| 426 |
|
426 |
|
| 427 |
void removeAllAssertingVHReferences(Value *V); |
427 |
void removeAllAssertingVHReferences(Value *V); |
| 428 |
bool eliminateAssumptions(Function &F); |
428 |
bool eliminateAssumptions(Function &F); |
| 429 |
bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr); |
429 |
bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr); |
| 430 |
bool eliminateMostlyEmptyBlocks(Function &F); |
430 |
bool eliminateMostlyEmptyBlocks(Function &F); |
| 431 |
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); |
431 |
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); |
| 432 |
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; |
432 |
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; |
| 433 |
void eliminateMostlyEmptyBlock(BasicBlock *BB); |
433 |
void eliminateMostlyEmptyBlock(BasicBlock *BB); |
| 434 |
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, |
434 |
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, |
| 435 |
bool isPreheader); |
435 |
bool isPreheader); |
| 436 |
bool makeBitReverse(Instruction &I); |
436 |
bool makeBitReverse(Instruction &I); |
| 437 |
bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT); |
437 |
bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT); |
| 438 |
bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT); |
438 |
bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT); |
| 439 |
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, |
439 |
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, |
| 440 |
unsigned AddrSpace); |
440 |
unsigned AddrSpace); |
| 441 |
bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); |
441 |
bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); |
| 442 |
bool optimizeInlineAsmInst(CallInst *CS); |
442 |
bool optimizeInlineAsmInst(CallInst *CS); |
| 443 |
bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT); |
443 |
bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT); |
| 444 |
bool optimizeExt(Instruction *&I); |
444 |
bool optimizeExt(Instruction *&I); |
| 445 |
bool optimizeExtUses(Instruction *I); |
445 |
bool optimizeExtUses(Instruction *I); |
| 446 |
bool optimizeLoadExt(LoadInst *Load); |
446 |
bool optimizeLoadExt(LoadInst *Load); |
| 447 |
bool optimizeShiftInst(BinaryOperator *BO); |
447 |
bool optimizeShiftInst(BinaryOperator *BO); |
| 448 |
bool optimizeFunnelShift(IntrinsicInst *Fsh); |
448 |
bool optimizeFunnelShift(IntrinsicInst *Fsh); |
| 449 |
bool optimizeSelectInst(SelectInst *SI); |
449 |
bool optimizeSelectInst(SelectInst *SI); |
| 450 |
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); |
450 |
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); |
| 451 |
bool optimizeSwitchType(SwitchInst *SI); |
451 |
bool optimizeSwitchType(SwitchInst *SI); |
| 452 |
bool optimizeSwitchPhiConstants(SwitchInst *SI); |
452 |
bool optimizeSwitchPhiConstants(SwitchInst *SI); |
| 453 |
bool optimizeSwitchInst(SwitchInst *SI); |
453 |
bool optimizeSwitchInst(SwitchInst *SI); |
| 454 |
bool optimizeExtractElementInst(Instruction *Inst); |
454 |
bool optimizeExtractElementInst(Instruction *Inst); |
| 455 |
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT); |
455 |
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT); |
| 456 |
bool fixupDbgValue(Instruction *I); |
456 |
bool fixupDbgValue(Instruction *I); |
| 457 |
bool placeDbgValues(Function &F); |
457 |
bool placeDbgValues(Function &F); |
| 458 |
bool placePseudoProbes(Function &F); |
458 |
bool placePseudoProbes(Function &F); |
| 459 |
bool canFormExtLd(const SmallVectorImpl &MovedExts, |
459 |
bool canFormExtLd(const SmallVectorImpl &MovedExts, |
| 460 |
LoadInst *&LI, Instruction *&Inst, bool HasPromoted); |
460 |
LoadInst *&LI, Instruction *&Inst, bool HasPromoted); |
| 461 |
bool tryToPromoteExts(TypePromotionTransaction &TPT, |
461 |
bool tryToPromoteExts(TypePromotionTransaction &TPT, |
| 462 |
const SmallVectorImpl &Exts, |
462 |
const SmallVectorImpl &Exts, |
| 463 |
SmallVectorImpl &ProfitablyMovedExts, |
463 |
SmallVectorImpl &ProfitablyMovedExts, |
| 464 |
unsigned CreatedInstsCost = 0); |
464 |
unsigned CreatedInstsCost = 0); |
| 465 |
bool mergeSExts(Function &F); |
465 |
bool mergeSExts(Function &F); |
| 466 |
bool splitLargeGEPOffsets(); |
466 |
bool splitLargeGEPOffsets(); |
| 467 |
bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl &Visited, |
467 |
bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl &Visited, |
| 468 |
SmallPtrSetImpl &DeletedInstrs); |
468 |
SmallPtrSetImpl &DeletedInstrs); |
| 469 |
bool optimizePhiTypes(Function &F); |
469 |
bool optimizePhiTypes(Function &F); |
| 470 |
bool performAddressTypePromotion( |
470 |
bool performAddressTypePromotion( |
| 471 |
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, |
471 |
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, |
| 472 |
bool HasPromoted, TypePromotionTransaction &TPT, |
472 |
bool HasPromoted, TypePromotionTransaction &TPT, |
| 473 |
SmallVectorImpl &SpeculativelyMovedExts); |
473 |
SmallVectorImpl &SpeculativelyMovedExts); |
| 474 |
bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT); |
474 |
bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT); |
| 475 |
bool simplifyOffsetableRelocate(GCStatepointInst &I); |
475 |
bool simplifyOffsetableRelocate(GCStatepointInst &I); |
| 476 |
|
476 |
|
| 477 |
bool tryToSinkFreeOperands(Instruction *I); |
477 |
bool tryToSinkFreeOperands(Instruction *I); |
| 478 |
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, |
478 |
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, |
| 479 |
CmpInst *Cmp, Intrinsic::ID IID); |
479 |
CmpInst *Cmp, Intrinsic::ID IID); |
| 480 |
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT); |
480 |
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT); |
| 481 |
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); |
481 |
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); |
| 482 |
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); |
482 |
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); |
| 483 |
void verifyBFIUpdates(Function &F); |
483 |
void verifyBFIUpdates(Function &F); |
| 484 |
}; |
484 |
}; |
| 485 |
|
485 |
|
| 486 |
} // end anonymous namespace |
486 |
} // end anonymous namespace |
| 487 |
|
487 |
|
| 488 |
char CodeGenPrepare::ID = 0; |
488 |
char CodeGenPrepare::ID = 0; |
| 489 |
|
489 |
|
| 490 |
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, |
490 |
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, |
| 491 |
"Optimize for code generation", false, false) |
491 |
"Optimize for code generation", false, false) |
| 492 |
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) |
492 |
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) |
| 493 |
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
493 |
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 494 |
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
494 |
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
| 495 |
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
495 |
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
| 496 |
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
496 |
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
| 497 |
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
497 |
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 498 |
INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", |
498 |
INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", |
| 499 |
false, false) |
499 |
false, false) |
| 500 |
|
500 |
|
| 501 |
FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } |
501 |
FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } |
| 502 |
|
502 |
|
| 503 |
bool CodeGenPrepare::runOnFunction(Function &F) { |
503 |
bool CodeGenPrepare::runOnFunction(Function &F) { |
| 504 |
if (skipFunction(F)) |
504 |
if (skipFunction(F)) |
| 505 |
return false; |
505 |
return false; |
| 506 |
|
506 |
|
| 507 |
DL = &F.getParent()->getDataLayout(); |
507 |
DL = &F.getParent()->getDataLayout(); |
| 508 |
|
508 |
|
| 509 |
bool EverMadeChange = false; |
509 |
bool EverMadeChange = false; |
| 510 |
|
510 |
|
| 511 |
TM = &getAnalysis().getTM(); |
511 |
TM = &getAnalysis().getTM(); |
| 512 |
SubtargetInfo = TM->getSubtargetImpl(F); |
512 |
SubtargetInfo = TM->getSubtargetImpl(F); |
| 513 |
TLI = SubtargetInfo->getTargetLowering(); |
513 |
TLI = SubtargetInfo->getTargetLowering(); |
| 514 |
TRI = SubtargetInfo->getRegisterInfo(); |
514 |
TRI = SubtargetInfo->getRegisterInfo(); |
| 515 |
TLInfo = &getAnalysis().getTLI(F); |
515 |
TLInfo = &getAnalysis().getTLI(F); |
| 516 |
TTI = &getAnalysis().getTTI(F); |
516 |
TTI = &getAnalysis().getTTI(F); |
| 517 |
LI = &getAnalysis().getLoopInfo(); |
517 |
LI = &getAnalysis().getLoopInfo(); |
| 518 |
BPI.reset(new BranchProbabilityInfo(F, *LI)); |
518 |
BPI.reset(new BranchProbabilityInfo(F, *LI)); |
| 519 |
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); |
519 |
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); |
| 520 |
PSI = &getAnalysis().getPSI(); |
520 |
PSI = &getAnalysis().getPSI(); |
| 521 |
BBSectionsProfileReader = |
521 |
BBSectionsProfileReader = |
| 522 |
getAnalysisIfAvailable(); |
522 |
getAnalysisIfAvailable(); |
| 523 |
OptSize = F.hasOptSize(); |
523 |
OptSize = F.hasOptSize(); |
| 524 |
// Use the basic-block-sections profile to promote hot functions to .text.hot |
524 |
// Use the basic-block-sections profile to promote hot functions to .text.hot |
| 525 |
// if requested. |
525 |
// if requested. |
| 526 |
if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && |
526 |
if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && |
| 527 |
BBSectionsProfileReader->isFunctionHot(F.getName())) { |
527 |
BBSectionsProfileReader->isFunctionHot(F.getName())) { |
| 528 |
F.setSectionPrefix("hot"); |
528 |
F.setSectionPrefix("hot"); |
| 529 |
} else if (ProfileGuidedSectionPrefix) { |
529 |
} else if (ProfileGuidedSectionPrefix) { |
| 530 |
// The hot attribute overwrites profile count based hotness while profile |
530 |
// The hot attribute overwrites profile count based hotness while profile |
| 531 |
// counts based hotness overwrite the cold attribute. |
531 |
// counts based hotness overwrite the cold attribute. |
| 532 |
// This is a conservative behabvior. |
532 |
// This is a conservative behabvior. |
| 533 |
if (F.hasFnAttribute(Attribute::Hot) || |
533 |
if (F.hasFnAttribute(Attribute::Hot) || |
| 534 |
PSI->isFunctionHotInCallGraph(&F, *BFI)) |
534 |
PSI->isFunctionHotInCallGraph(&F, *BFI)) |
| 535 |
F.setSectionPrefix("hot"); |
535 |
F.setSectionPrefix("hot"); |
| 536 |
// If PSI shows this function is not hot, we will placed the function |
536 |
// If PSI shows this function is not hot, we will placed the function |
| 537 |
// into unlikely section if (1) PSI shows this is a cold function, or |
537 |
// into unlikely section if (1) PSI shows this is a cold function, or |
| 538 |
// (2) the function has a attribute of cold. |
538 |
// (2) the function has a attribute of cold. |
| 539 |
else if (PSI->isFunctionColdInCallGraph(&F, *BFI) || |
539 |
else if (PSI->isFunctionColdInCallGraph(&F, *BFI) || |
| 540 |
F.hasFnAttribute(Attribute::Cold)) |
540 |
F.hasFnAttribute(Attribute::Cold)) |
| 541 |
F.setSectionPrefix("unlikely"); |
541 |
F.setSectionPrefix("unlikely"); |
| 542 |
else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && |
542 |
else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() && |
| 543 |
PSI->isFunctionHotnessUnknown(F)) |
543 |
PSI->isFunctionHotnessUnknown(F)) |
| 544 |
F.setSectionPrefix("unknown"); |
544 |
F.setSectionPrefix("unknown"); |
| 545 |
} |
545 |
} |
| 546 |
|
546 |
|
| 547 |
/// This optimization identifies DIV instructions that can be |
547 |
/// This optimization identifies DIV instructions that can be |
| 548 |
/// profitably bypassed and carried out with a shorter, faster divide. |
548 |
/// profitably bypassed and carried out with a shorter, faster divide. |
| 549 |
if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) { |
549 |
if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) { |
| 550 |
const DenseMap &BypassWidths = |
550 |
const DenseMap &BypassWidths = |
| 551 |
TLI->getBypassSlowDivWidths(); |
551 |
TLI->getBypassSlowDivWidths(); |
| 552 |
BasicBlock *BB = &*F.begin(); |
552 |
BasicBlock *BB = &*F.begin(); |
| 553 |
while (BB != nullptr) { |
553 |
while (BB != nullptr) { |
| 554 |
// bypassSlowDivision may create new BBs, but we don't want to reapply the |
554 |
// bypassSlowDivision may create new BBs, but we don't want to reapply the |
| 555 |
// optimization to those blocks. |
555 |
// optimization to those blocks. |
| 556 |
BasicBlock *Next = BB->getNextNode(); |
556 |
BasicBlock *Next = BB->getNextNode(); |
| 557 |
// F.hasOptSize is already checked in the outer if statement. |
557 |
// F.hasOptSize is already checked in the outer if statement. |
| 558 |
if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) |
558 |
if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) |
| 559 |
EverMadeChange |= bypassSlowDivision(BB, BypassWidths); |
559 |
EverMadeChange |= bypassSlowDivision(BB, BypassWidths); |
| 560 |
BB = Next; |
560 |
BB = Next; |
| 561 |
} |
561 |
} |
| 562 |
} |
562 |
} |
| 563 |
|
563 |
|
| 564 |
// Get rid of @llvm.assume builtins before attempting to eliminate empty |
564 |
// Get rid of @llvm.assume builtins before attempting to eliminate empty |
| 565 |
// blocks, since there might be blocks that only contain @llvm.assume calls |
565 |
// blocks, since there might be blocks that only contain @llvm.assume calls |
| 566 |
// (plus arguments that we can get rid of). |
566 |
// (plus arguments that we can get rid of). |
| 567 |
EverMadeChange |= eliminateAssumptions(F); |
567 |
EverMadeChange |= eliminateAssumptions(F); |
| 568 |
|
568 |
|
| 569 |
// Eliminate blocks that contain only PHI nodes and an |
569 |
// Eliminate blocks that contain only PHI nodes and an |
| 570 |
// unconditional branch. |
570 |
// unconditional branch. |
| 571 |
EverMadeChange |= eliminateMostlyEmptyBlocks(F); |
571 |
EverMadeChange |= eliminateMostlyEmptyBlocks(F); |
| 572 |
|
572 |
|
| 573 |
ModifyDT ModifiedDT = ModifyDT::NotModifyDT; |
573 |
ModifyDT ModifiedDT = ModifyDT::NotModifyDT; |
| 574 |
if (!DisableBranchOpts) |
574 |
if (!DisableBranchOpts) |
| 575 |
EverMadeChange |= splitBranchCondition(F, ModifiedDT); |
575 |
EverMadeChange |= splitBranchCondition(F, ModifiedDT); |
| 576 |
|
576 |
|
| 577 |
// Split some critical edges where one of the sources is an indirect branch, |
577 |
// Split some critical edges where one of the sources is an indirect branch, |
| 578 |
// to help generate sane code for PHIs involving such edges. |
578 |
// to help generate sane code for PHIs involving such edges. |
| 579 |
EverMadeChange |= |
579 |
EverMadeChange |= |
| 580 |
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); |
580 |
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); |
| 581 |
|
581 |
|
| 582 |
// If we are optimzing huge function, we need to consider the build time. |
582 |
// If we are optimzing huge function, we need to consider the build time. |
| 583 |
// Because the basic algorithm's complex is near O(N!). |
583 |
// Because the basic algorithm's complex is near O(N!). |
| 584 |
IsHugeFunc = F.size() > HugeFuncThresholdInCGPP; |
584 |
IsHugeFunc = F.size() > HugeFuncThresholdInCGPP; |
| 585 |
|
585 |
|
| 586 |
// Transformations above may invalidate dominator tree and/or loop info. |
586 |
// Transformations above may invalidate dominator tree and/or loop info. |
| 587 |
DT.reset(); |
587 |
DT.reset(); |
| 588 |
LI->releaseMemory(); |
588 |
LI->releaseMemory(); |
| 589 |
LI->analyze(getDT(F)); |
589 |
LI->analyze(getDT(F)); |
| 590 |
|
590 |
|
| 591 |
bool MadeChange = true; |
591 |
bool MadeChange = true; |
| 592 |
bool FuncIterated = false; |
592 |
bool FuncIterated = false; |
| 593 |
while (MadeChange) { |
593 |
while (MadeChange) { |
| 594 |
MadeChange = false; |
594 |
MadeChange = false; |
| 595 |
|
595 |
|
| 596 |
for (BasicBlock &BB : llvm::make_early_inc_range(F)) { |
596 |
for (BasicBlock &BB : llvm::make_early_inc_range(F)) { |
| 597 |
if (FuncIterated && !FreshBBs.contains(&BB)) |
597 |
if (FuncIterated && !FreshBBs.contains(&BB)) |
| 598 |
continue; |
598 |
continue; |
| 599 |
|
599 |
|
| 600 |
ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT; |
600 |
ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT; |
| 601 |
bool Changed = optimizeBlock(BB, ModifiedDTOnIteration); |
601 |
bool Changed = optimizeBlock(BB, ModifiedDTOnIteration); |
| 602 |
|
602 |
|
| 603 |
if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT) |
603 |
if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT) |
| 604 |
DT.reset(); |
604 |
DT.reset(); |
| 605 |
|
605 |
|
| 606 |
MadeChange |= Changed; |
606 |
MadeChange |= Changed; |
| 607 |
if (IsHugeFunc) { |
607 |
if (IsHugeFunc) { |
| 608 |
// If the BB is updated, it may still has chance to be optimized. |
608 |
// If the BB is updated, it may still has chance to be optimized. |
| 609 |
// This usually happen at sink optimization. |
609 |
// This usually happen at sink optimization. |
| 610 |
// For example: |
610 |
// For example: |
| 611 |
// |
611 |
// |
| 612 |
// bb0: |
612 |
// bb0: |
| 613 |
// %and = and i32 %a, 4 |
613 |
// %and = and i32 %a, 4 |
| 614 |
// %cmp = icmp eq i32 %and, 0 |
614 |
// %cmp = icmp eq i32 %and, 0 |
| 615 |
// |
615 |
// |
| 616 |
// If the %cmp sink to other BB, the %and will has chance to sink. |
616 |
// If the %cmp sink to other BB, the %and will has chance to sink. |
| 617 |
if (Changed) |
617 |
if (Changed) |
| 618 |
FreshBBs.insert(&BB); |
618 |
FreshBBs.insert(&BB); |
| 619 |
else if (FuncIterated) |
619 |
else if (FuncIterated) |
| 620 |
FreshBBs.erase(&BB); |
620 |
FreshBBs.erase(&BB); |
| 621 |
} else { |
621 |
} else { |
| 622 |
// For small/normal functions, we restart BB iteration if the dominator |
622 |
// For small/normal functions, we restart BB iteration if the dominator |
| 623 |
// tree of the Function was changed. |
623 |
// tree of the Function was changed. |
| 624 |
if (ModifiedDTOnIteration != ModifyDT::NotModifyDT) |
624 |
if (ModifiedDTOnIteration != ModifyDT::NotModifyDT) |
| 625 |
break; |
625 |
break; |
| 626 |
} |
626 |
} |
| 627 |
} |
627 |
} |
| 628 |
// We have iterated all the BB in the (only work for huge) function. |
628 |
// We have iterated all the BB in the (only work for huge) function. |
| 629 |
FuncIterated = IsHugeFunc; |
629 |
FuncIterated = IsHugeFunc; |
| 630 |
|
630 |
|
| 631 |
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) |
631 |
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) |
| 632 |
MadeChange |= mergeSExts(F); |
632 |
MadeChange |= mergeSExts(F); |
| 633 |
if (!LargeOffsetGEPMap.empty()) |
633 |
if (!LargeOffsetGEPMap.empty()) |
| 634 |
MadeChange |= splitLargeGEPOffsets(); |
634 |
MadeChange |= splitLargeGEPOffsets(); |
| 635 |
MadeChange |= optimizePhiTypes(F); |
635 |
MadeChange |= optimizePhiTypes(F); |
| 636 |
|
636 |
|
| 637 |
if (MadeChange) |
637 |
if (MadeChange) |
| 638 |
eliminateFallThrough(F, DT.get()); |
638 |
eliminateFallThrough(F, DT.get()); |
| 639 |
|
639 |
|
| 640 |
#ifndef NDEBUG |
640 |
#ifndef NDEBUG |
| 641 |
if (MadeChange && VerifyLoopInfo) |
641 |
if (MadeChange && VerifyLoopInfo) |
| 642 |
LI->verify(getDT(F)); |
642 |
LI->verify(getDT(F)); |
| 643 |
#endif |
643 |
#endif |
| 644 |
|
644 |
|
| 645 |
// Really free removed instructions during promotion. |
645 |
// Really free removed instructions during promotion. |
| 646 |
for (Instruction *I : RemovedInsts) |
646 |
for (Instruction *I : RemovedInsts) |
| 647 |
I->deleteValue(); |
647 |
I->deleteValue(); |
| 648 |
|
648 |
|
| 649 |
EverMadeChange |= MadeChange; |
649 |
EverMadeChange |= MadeChange; |
| 650 |
SeenChainsForSExt.clear(); |
650 |
SeenChainsForSExt.clear(); |
| 651 |
ValToSExtendedUses.clear(); |
651 |
ValToSExtendedUses.clear(); |
| 652 |
RemovedInsts.clear(); |
652 |
RemovedInsts.clear(); |
| 653 |
LargeOffsetGEPMap.clear(); |
653 |
LargeOffsetGEPMap.clear(); |
| 654 |
LargeOffsetGEPID.clear(); |
654 |
LargeOffsetGEPID.clear(); |
| 655 |
} |
655 |
} |
| 656 |
|
656 |
|
| 657 |
NewGEPBases.clear(); |
657 |
NewGEPBases.clear(); |
| 658 |
SunkAddrs.clear(); |
658 |
SunkAddrs.clear(); |
| 659 |
|
659 |
|
| 660 |
if (!DisableBranchOpts) { |
660 |
if (!DisableBranchOpts) { |
| 661 |
MadeChange = false; |
661 |
MadeChange = false; |
| 662 |
// Use a set vector to get deterministic iteration order. The order the |
662 |
// Use a set vector to get deterministic iteration order. The order the |
| 663 |
// blocks are removed may affect whether or not PHI nodes in successors |
663 |
// blocks are removed may affect whether or not PHI nodes in successors |
| 664 |
// are removed. |
664 |
// are removed. |
| 665 |
SmallSetVector WorkList; |
665 |
SmallSetVector WorkList; |
| 666 |
for (BasicBlock &BB : F) { |
666 |
for (BasicBlock &BB : F) { |
| 667 |
SmallVector Successors(successors(&BB)); |
667 |
SmallVector Successors(successors(&BB)); |
| 668 |
MadeChange |= ConstantFoldTerminator(&BB, true); |
668 |
MadeChange |= ConstantFoldTerminator(&BB, true); |
| 669 |
if (!MadeChange) |
669 |
if (!MadeChange) |
| 670 |
continue; |
670 |
continue; |
| 671 |
|
671 |
|
| 672 |
for (BasicBlock *Succ : Successors) |
672 |
for (BasicBlock *Succ : Successors) |
| 673 |
if (pred_empty(Succ)) |
673 |
if (pred_empty(Succ)) |
| 674 |
WorkList.insert(Succ); |
674 |
WorkList.insert(Succ); |
| 675 |
} |
675 |
} |
| 676 |
|
676 |
|
| 677 |
// Delete the dead blocks and any of their dead successors. |
677 |
// Delete the dead blocks and any of their dead successors. |
| 678 |
MadeChange |= !WorkList.empty(); |
678 |
MadeChange |= !WorkList.empty(); |
| 679 |
while (!WorkList.empty()) { |
679 |
while (!WorkList.empty()) { |
| 680 |
BasicBlock *BB = WorkList.pop_back_val(); |
680 |
BasicBlock *BB = WorkList.pop_back_val(); |
| 681 |
SmallVector Successors(successors(BB)); |
681 |
SmallVector Successors(successors(BB)); |
| 682 |
|
682 |
|
| 683 |
DeleteDeadBlock(BB); |
683 |
DeleteDeadBlock(BB); |
| 684 |
|
684 |
|
| 685 |
for (BasicBlock *Succ : Successors) |
685 |
for (BasicBlock *Succ : Successors) |
| 686 |
if (pred_empty(Succ)) |
686 |
if (pred_empty(Succ)) |
| 687 |
WorkList.insert(Succ); |
687 |
WorkList.insert(Succ); |
| 688 |
} |
688 |
} |
| 689 |
|
689 |
|
| 690 |
// Merge pairs of basic blocks with unconditional branches, connected by |
690 |
// Merge pairs of basic blocks with unconditional branches, connected by |
| 691 |
// a single edge. |
691 |
// a single edge. |
| 692 |
if (EverMadeChange || MadeChange) |
692 |
if (EverMadeChange || MadeChange) |
| 693 |
MadeChange |= eliminateFallThrough(F); |
693 |
MadeChange |= eliminateFallThrough(F); |
| 694 |
|
694 |
|
| 695 |
EverMadeChange |= MadeChange; |
695 |
EverMadeChange |= MadeChange; |
| 696 |
} |
696 |
} |
| 697 |
|
697 |
|
| 698 |
if (!DisableGCOpts) { |
698 |
if (!DisableGCOpts) { |
| 699 |
SmallVector Statepoints; |
699 |
SmallVector Statepoints; |
| 700 |
for (BasicBlock &BB : F) |
700 |
for (BasicBlock &BB : F) |
| 701 |
for (Instruction &I : BB) |
701 |
for (Instruction &I : BB) |
| 702 |
if (auto *SP = dyn_cast(&I)) |
702 |
if (auto *SP = dyn_cast(&I)) |
| 703 |
Statepoints.push_back(SP); |
703 |
Statepoints.push_back(SP); |
| 704 |
for (auto &I : Statepoints) |
704 |
for (auto &I : Statepoints) |
| 705 |
EverMadeChange |= simplifyOffsetableRelocate(*I); |
705 |
EverMadeChange |= simplifyOffsetableRelocate(*I); |
| 706 |
} |
706 |
} |
| 707 |
|
707 |
|
| 708 |
// Do this last to clean up use-before-def scenarios introduced by other |
708 |
// Do this last to clean up use-before-def scenarios introduced by other |
| 709 |
// preparatory transforms. |
709 |
// preparatory transforms. |
| 710 |
EverMadeChange |= placeDbgValues(F); |
710 |
EverMadeChange |= placeDbgValues(F); |
| 711 |
EverMadeChange |= placePseudoProbes(F); |
711 |
EverMadeChange |= placePseudoProbes(F); |
| 712 |
|
712 |
|
| 713 |
#ifndef NDEBUG |
713 |
#ifndef NDEBUG |
| 714 |
if (VerifyBFIUpdates) |
714 |
if (VerifyBFIUpdates) |
| 715 |
verifyBFIUpdates(F); |
715 |
verifyBFIUpdates(F); |
| 716 |
#endif |
716 |
#endif |
| 717 |
|
717 |
|
| 718 |
return EverMadeChange; |
718 |
return EverMadeChange; |
| 719 |
} |
719 |
} |
| 720 |
|
720 |
|
| 721 |
bool CodeGenPrepare::eliminateAssumptions(Function &F) { |
721 |
bool CodeGenPrepare::eliminateAssumptions(Function &F) { |
| 722 |
bool MadeChange = false; |
722 |
bool MadeChange = false; |
| 723 |
for (BasicBlock &BB : F) { |
723 |
for (BasicBlock &BB : F) { |
| 724 |
CurInstIterator = BB.begin(); |
724 |
CurInstIterator = BB.begin(); |
| 725 |
while (CurInstIterator != BB.end()) { |
725 |
while (CurInstIterator != BB.end()) { |
| 726 |
Instruction *I = &*(CurInstIterator++); |
726 |
Instruction *I = &*(CurInstIterator++); |
| 727 |
if (auto *Assume = dyn_cast(I)) { |
727 |
if (auto *Assume = dyn_cast(I)) { |
| 728 |
MadeChange = true; |
728 |
MadeChange = true; |
| 729 |
Value *Operand = Assume->getOperand(0); |
729 |
Value *Operand = Assume->getOperand(0); |
| 730 |
Assume->eraseFromParent(); |
730 |
Assume->eraseFromParent(); |
| 731 |
|
731 |
|
| 732 |
resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { |
732 |
resetIteratorIfInvalidatedWhileCalling(&BB, [&]() { |
| 733 |
RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); |
733 |
RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr); |
| 734 |
}); |
734 |
}); |
| 735 |
} |
735 |
} |
| 736 |
} |
736 |
} |
| 737 |
} |
737 |
} |
| 738 |
return MadeChange; |
738 |
return MadeChange; |
| 739 |
} |
739 |
} |
| 740 |
|
740 |
|
| 741 |
/// An instruction is about to be deleted, so remove all references to it in our |
741 |
/// An instruction is about to be deleted, so remove all references to it in our |
| 742 |
/// GEP-tracking data strcutures. |
742 |
/// GEP-tracking data strcutures. |
| 743 |
void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { |
743 |
void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) { |
| 744 |
LargeOffsetGEPMap.erase(V); |
744 |
LargeOffsetGEPMap.erase(V); |
| 745 |
NewGEPBases.erase(V); |
745 |
NewGEPBases.erase(V); |
| 746 |
|
746 |
|
| 747 |
auto GEP = dyn_cast(V); |
747 |
auto GEP = dyn_cast(V); |
| 748 |
if (!GEP) |
748 |
if (!GEP) |
| 749 |
return; |
749 |
return; |
| 750 |
|
750 |
|
| 751 |
LargeOffsetGEPID.erase(GEP); |
751 |
LargeOffsetGEPID.erase(GEP); |
| 752 |
|
752 |
|
| 753 |
auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand()); |
753 |
auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand()); |
| 754 |
if (VecI == LargeOffsetGEPMap.end()) |
754 |
if (VecI == LargeOffsetGEPMap.end()) |
| 755 |
return; |
755 |
return; |
| 756 |
|
756 |
|
| 757 |
auto &GEPVector = VecI->second; |
757 |
auto &GEPVector = VecI->second; |
| 758 |
llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); |
758 |
llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; }); |
| 759 |
|
759 |
|
| 760 |
if (GEPVector.empty()) |
760 |
if (GEPVector.empty()) |
| 761 |
LargeOffsetGEPMap.erase(VecI); |
761 |
LargeOffsetGEPMap.erase(VecI); |
| 762 |
} |
762 |
} |
| 763 |
|
763 |
|
| 764 |
// Verify BFI has been updated correctly by recomputing BFI and comparing them. |
764 |
// Verify BFI has been updated correctly by recomputing BFI and comparing them. |
| 765 |
void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { |
765 |
void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) { |
| 766 |
DominatorTree NewDT(F); |
766 |
DominatorTree NewDT(F); |
| 767 |
LoopInfo NewLI(NewDT); |
767 |
LoopInfo NewLI(NewDT); |
| 768 |
BranchProbabilityInfo NewBPI(F, NewLI, TLInfo); |
768 |
BranchProbabilityInfo NewBPI(F, NewLI, TLInfo); |
| 769 |
BlockFrequencyInfo NewBFI(F, NewBPI, NewLI); |
769 |
BlockFrequencyInfo NewBFI(F, NewBPI, NewLI); |
| 770 |
NewBFI.verifyMatch(*BFI); |
770 |
NewBFI.verifyMatch(*BFI); |
| 771 |
} |
771 |
} |
| 772 |
|
772 |
|
| 773 |
/// Merge basic blocks which are connected by a single edge, where one of the |
773 |
/// Merge basic blocks which are connected by a single edge, where one of the |
| 774 |
/// basic blocks has a single successor pointing to the other basic block, |
774 |
/// basic blocks has a single successor pointing to the other basic block, |
| 775 |
/// which has a single predecessor. |
775 |
/// which has a single predecessor. |
| 776 |
bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) { |
776 |
bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) { |
| 777 |
bool Changed = false; |
777 |
bool Changed = false; |
| 778 |
// Scan all of the blocks in the function, except for the entry block. |
778 |
// Scan all of the blocks in the function, except for the entry block. |
| 779 |
// Use a temporary array to avoid iterator being invalidated when |
779 |
// Use a temporary array to avoid iterator being invalidated when |
| 780 |
// deleting blocks. |
780 |
// deleting blocks. |
| 781 |
SmallVector Blocks; |
781 |
SmallVector Blocks; |
| 782 |
for (auto &Block : llvm::drop_begin(F)) |
782 |
for (auto &Block : llvm::drop_begin(F)) |
| 783 |
Blocks.push_back(&Block); |
783 |
Blocks.push_back(&Block); |
| 784 |
|
784 |
|
| 785 |
SmallSet Preds; |
785 |
SmallSet Preds; |
| 786 |
for (auto &Block : Blocks) { |
786 |
for (auto &Block : Blocks) { |
| 787 |
auto *BB = cast_or_null(Block); |
787 |
auto *BB = cast_or_null(Block); |
| 788 |
if (!BB) |
788 |
if (!BB) |
| 789 |
continue; |
789 |
continue; |
| 790 |
// If the destination block has a single pred, then this is a trivial |
790 |
// If the destination block has a single pred, then this is a trivial |
| 791 |
// edge, just collapse it. |
791 |
// edge, just collapse it. |
| 792 |
BasicBlock *SinglePred = BB->getSinglePredecessor(); |
792 |
BasicBlock *SinglePred = BB->getSinglePredecessor(); |
| 793 |
|
793 |
|
| 794 |
// Don't merge if BB's address is taken. |
794 |
// Don't merge if BB's address is taken. |
| 795 |
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) |
795 |
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) |
| 796 |
continue; |
796 |
continue; |
| 797 |
|
797 |
|
| 798 |
// Make an effort to skip unreachable blocks. |
798 |
// Make an effort to skip unreachable blocks. |
| 799 |
if (DT && !DT->isReachableFromEntry(BB)) |
799 |
if (DT && !DT->isReachableFromEntry(BB)) |
| 800 |
continue; |
800 |
continue; |
| 801 |
|
801 |
|
| 802 |
BranchInst *Term = dyn_cast(SinglePred->getTerminator()); |
802 |
BranchInst *Term = dyn_cast(SinglePred->getTerminator()); |
| 803 |
if (Term && !Term->isConditional()) { |
803 |
if (Term && !Term->isConditional()) { |
| 804 |
Changed = true; |
804 |
Changed = true; |
| 805 |
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); |
805 |
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); |
| 806 |
|
806 |
|
| 807 |
// Merge BB into SinglePred and delete it. |
807 |
// Merge BB into SinglePred and delete it. |
| 808 |
MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr, |
808 |
MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr, |
| 809 |
/* MemDep */ nullptr, |
809 |
/* MemDep */ nullptr, |
| 810 |
/* PredecessorWithTwoSuccessors */ false, DT); |
810 |
/* PredecessorWithTwoSuccessors */ false, DT); |
| 811 |
Preds.insert(SinglePred); |
811 |
Preds.insert(SinglePred); |
| 812 |
|
812 |
|
| 813 |
if (IsHugeFunc) { |
813 |
if (IsHugeFunc) { |
| 814 |
// Update FreshBBs to optimize the merged BB. |
814 |
// Update FreshBBs to optimize the merged BB. |
| 815 |
FreshBBs.insert(SinglePred); |
815 |
FreshBBs.insert(SinglePred); |
| 816 |
FreshBBs.erase(BB); |
816 |
FreshBBs.erase(BB); |
| 817 |
} |
817 |
} |
| 818 |
} |
818 |
} |
| 819 |
} |
819 |
} |
| 820 |
|
820 |
|
| 821 |
// (Repeatedly) merging blocks into their predecessors can create redundant |
821 |
// (Repeatedly) merging blocks into their predecessors can create redundant |
| 822 |
// debug intrinsics. |
822 |
// debug intrinsics. |
| 823 |
for (const auto &Pred : Preds) |
823 |
for (const auto &Pred : Preds) |
| 824 |
if (auto *BB = cast_or_null(Pred)) |
824 |
if (auto *BB = cast_or_null(Pred)) |
| 825 |
RemoveRedundantDbgInstrs(BB); |
825 |
RemoveRedundantDbgInstrs(BB); |
| 826 |
|
826 |
|
| 827 |
return Changed; |
827 |
return Changed; |
| 828 |
} |
828 |
} |
| 829 |
|
829 |
|
| 830 |
/// Find a destination block from BB if BB is mergeable empty block. |
830 |
/// Find a destination block from BB if BB is mergeable empty block. |
| 831 |
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { |
831 |
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { |
| 832 |
// If this block doesn't end with an uncond branch, ignore it. |
832 |
// If this block doesn't end with an uncond branch, ignore it. |
| 833 |
BranchInst *BI = dyn_cast(BB->getTerminator()); |
833 |
BranchInst *BI = dyn_cast(BB->getTerminator()); |
| 834 |
if (!BI || !BI->isUnconditional()) |
834 |
if (!BI || !BI->isUnconditional()) |
| 835 |
return nullptr; |
835 |
return nullptr; |
| 836 |
|
836 |
|
| 837 |
// If the instruction before the branch (skipping debug info) isn't a phi |
837 |
// If the instruction before the branch (skipping debug info) isn't a phi |
| 838 |
// node, then other stuff is happening here. |
838 |
// node, then other stuff is happening here. |
| 839 |
BasicBlock::iterator BBI = BI->getIterator(); |
839 |
BasicBlock::iterator BBI = BI->getIterator(); |
| 840 |
if (BBI != BB->begin()) { |
840 |
if (BBI != BB->begin()) { |
| 841 |
--BBI; |
841 |
--BBI; |
| 842 |
while (isa(BBI)) { |
842 |
while (isa(BBI)) { |
| 843 |
if (BBI == BB->begin()) |
843 |
if (BBI == BB->begin()) |
| 844 |
break; |
844 |
break; |
| 845 |
--BBI; |
845 |
--BBI; |
| 846 |
} |
846 |
} |
| 847 |
if (!isa(BBI) && !isa(BBI)) |
847 |
if (!isa(BBI) && !isa(BBI)) |
| 848 |
return nullptr; |
848 |
return nullptr; |
| 849 |
} |
849 |
} |
| 850 |
|
850 |
|
| 851 |
// Do not break infinite loops. |
851 |
// Do not break infinite loops. |
| 852 |
BasicBlock *DestBB = BI->getSuccessor(0); |
852 |
BasicBlock *DestBB = BI->getSuccessor(0); |
| 853 |
if (DestBB == BB) |
853 |
if (DestBB == BB) |
| 854 |
return nullptr; |
854 |
return nullptr; |
| 855 |
|
855 |
|
| 856 |
if (!canMergeBlocks(BB, DestBB)) |
856 |
if (!canMergeBlocks(BB, DestBB)) |
| 857 |
DestBB = nullptr; |
857 |
DestBB = nullptr; |
| 858 |
|
858 |
|
| 859 |
return DestBB; |
859 |
return DestBB; |
| 860 |
} |
860 |
} |
| 861 |
|
861 |
|
| 862 |
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an |
862 |
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an |
| 863 |
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split |
863 |
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split |
| 864 |
/// edges in ways that are non-optimal for isel. Start by eliminating these |
864 |
/// edges in ways that are non-optimal for isel. Start by eliminating these |
| 865 |
/// blocks so we can split them the way we want them. |
865 |
/// blocks so we can split them the way we want them. |
| 866 |
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { |
866 |
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { |
| 867 |
SmallPtrSet Preheaders; |
867 |
SmallPtrSet Preheaders; |
| 868 |
SmallVector LoopList(LI->begin(), LI->end()); |
868 |
SmallVector LoopList(LI->begin(), LI->end()); |
| 869 |
while (!LoopList.empty()) { |
869 |
while (!LoopList.empty()) { |
| 870 |
Loop *L = LoopList.pop_back_val(); |
870 |
Loop *L = LoopList.pop_back_val(); |
| 871 |
llvm::append_range(LoopList, *L); |
871 |
llvm::append_range(LoopList, *L); |
| 872 |
if (BasicBlock *Preheader = L->getLoopPreheader()) |
872 |
if (BasicBlock *Preheader = L->getLoopPreheader()) |
| 873 |
Preheaders.insert(Preheader); |
873 |
Preheaders.insert(Preheader); |
| 874 |
} |
874 |
} |
| 875 |
|
875 |
|
| 876 |
bool MadeChange = false; |
876 |
bool MadeChange = false; |
| 877 |
// Copy blocks into a temporary array to avoid iterator invalidation issues |
877 |
// Copy blocks into a temporary array to avoid iterator invalidation issues |
| 878 |
// as we remove them. |
878 |
// as we remove them. |
| 879 |
// Note that this intentionally skips the entry block. |
879 |
// Note that this intentionally skips the entry block. |
| 880 |
SmallVector Blocks; |
880 |
SmallVector Blocks; |
| 881 |
for (auto &Block : llvm::drop_begin(F)) |
881 |
for (auto &Block : llvm::drop_begin(F)) |
| 882 |
Blocks.push_back(&Block); |
882 |
Blocks.push_back(&Block); |
| 883 |
|
883 |
|
| 884 |
for (auto &Block : Blocks) { |
884 |
for (auto &Block : Blocks) { |
| 885 |
BasicBlock *BB = cast_or_null(Block); |
885 |
BasicBlock *BB = cast_or_null(Block); |
| 886 |
if (!BB) |
886 |
if (!BB) |
| 887 |
continue; |
887 |
continue; |
| 888 |
BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); |
888 |
BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); |
| 889 |
if (!DestBB || |
889 |
if (!DestBB || |
| 890 |
!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) |
890 |
!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) |
| 891 |
continue; |
891 |
continue; |
| 892 |
|
892 |
|
| 893 |
eliminateMostlyEmptyBlock(BB); |
893 |
eliminateMostlyEmptyBlock(BB); |
| 894 |
MadeChange = true; |
894 |
MadeChange = true; |
| 895 |
} |
895 |
} |
| 896 |
return MadeChange; |
896 |
return MadeChange; |
| 897 |
} |
897 |
} |
| 898 |
|
898 |
|
| 899 |
bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, |
899 |
bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, |
| 900 |
BasicBlock *DestBB, |
900 |
BasicBlock *DestBB, |
| 901 |
bool isPreheader) { |
901 |
bool isPreheader) { |
| 902 |
// Do not delete loop preheaders if doing so would create a critical edge. |
902 |
// Do not delete loop preheaders if doing so would create a critical edge. |
| 903 |
// Loop preheaders can be good locations to spill registers. If the |
903 |
// Loop preheaders can be good locations to spill registers. If the |
| 904 |
// preheader is deleted and we create a critical edge, registers may be |
904 |
// preheader is deleted and we create a critical edge, registers may be |
| 905 |
// spilled in the loop body instead. |
905 |
// spilled in the loop body instead. |
| 906 |
if (!DisablePreheaderProtect && isPreheader && |
906 |
if (!DisablePreheaderProtect && isPreheader && |
| 907 |
!(BB->getSinglePredecessor() && |
907 |
!(BB->getSinglePredecessor() && |
| 908 |
BB->getSinglePredecessor()->getSingleSuccessor())) |
908 |
BB->getSinglePredecessor()->getSingleSuccessor())) |
| 909 |
return false; |
909 |
return false; |
| 910 |
|
910 |
|
| 911 |
// Skip merging if the block's successor is also a successor to any callbr |
911 |
// Skip merging if the block's successor is also a successor to any callbr |
| 912 |
// that leads to this block. |
912 |
// that leads to this block. |
| 913 |
// FIXME: Is this really needed? Is this a correctness issue? |
913 |
// FIXME: Is this really needed? Is this a correctness issue? |
| 914 |
for (BasicBlock *Pred : predecessors(BB)) { |
914 |
for (BasicBlock *Pred : predecessors(BB)) { |
| 915 |
if (auto *CBI = dyn_cast((Pred)->getTerminator())) |
915 |
if (auto *CBI = dyn_cast((Pred)->getTerminator())) |
| 916 |
for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) |
916 |
for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) |
| 917 |
if (DestBB == CBI->getSuccessor(i)) |
917 |
if (DestBB == CBI->getSuccessor(i)) |
| 918 |
return false; |
918 |
return false; |
| 919 |
} |
919 |
} |
| 920 |
|
920 |
|
| 921 |
// Try to skip merging if the unique predecessor of BB is terminated by a |
921 |
// Try to skip merging if the unique predecessor of BB is terminated by a |
| 922 |
// switch or indirect branch instruction, and BB is used as an incoming block |
922 |
// switch or indirect branch instruction, and BB is used as an incoming block |
| 923 |
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to |
923 |
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to |
| 924 |
// add COPY instructions in the predecessor of BB instead of BB (if it is not |
924 |
// add COPY instructions in the predecessor of BB instead of BB (if it is not |
| 925 |
// merged). Note that the critical edge created by merging such blocks wont be |
925 |
// merged). Note that the critical edge created by merging such blocks wont be |
| 926 |
// split in MachineSink because the jump table is not analyzable. By keeping |
926 |
// split in MachineSink because the jump table is not analyzable. By keeping |
| 927 |
// such empty block (BB), ISel will place COPY instructions in BB, not in the |
927 |
// such empty block (BB), ISel will place COPY instructions in BB, not in the |
| 928 |
// predecessor of BB. |
928 |
// predecessor of BB. |
| 929 |
BasicBlock *Pred = BB->getUniquePredecessor(); |
929 |
BasicBlock *Pred = BB->getUniquePredecessor(); |
| 930 |
if (!Pred || !(isa(Pred->getTerminator()) || |
930 |
if (!Pred || !(isa(Pred->getTerminator()) || |
| 931 |
isa(Pred->getTerminator()))) |
931 |
isa(Pred->getTerminator()))) |
| 932 |
return true; |
932 |
return true; |
| 933 |
|
933 |
|
| 934 |
if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) |
934 |
if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) |
| 935 |
return true; |
935 |
return true; |
| 936 |
|
936 |
|
| 937 |
// We use a simple cost heuristic which determine skipping merging is |
937 |
// We use a simple cost heuristic which determine skipping merging is |
| 938 |
// profitable if the cost of skipping merging is less than the cost of |
938 |
// profitable if the cost of skipping merging is less than the cost of |
| 939 |
// merging : Cost(skipping merging) < Cost(merging BB), where the |
939 |
// merging : Cost(skipping merging) < Cost(merging BB), where the |
| 940 |
// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and |
940 |
// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and |
| 941 |
// the Cost(merging BB) is Freq(Pred) * Cost(Copy). |
941 |
// the Cost(merging BB) is Freq(Pred) * Cost(Copy). |
| 942 |
// Assuming Cost(Copy) == Cost(Branch), we could simplify it to : |
942 |
// Assuming Cost(Copy) == Cost(Branch), we could simplify it to : |
| 943 |
// Freq(Pred) / Freq(BB) > 2. |
943 |
// Freq(Pred) / Freq(BB) > 2. |
| 944 |
// Note that if there are multiple empty blocks sharing the same incoming |
944 |
// Note that if there are multiple empty blocks sharing the same incoming |
| 945 |
// value for the PHIs in the DestBB, we consider them together. In such |
945 |
// value for the PHIs in the DestBB, we consider them together. In such |
| 946 |
// case, Cost(merging BB) will be the sum of their frequencies. |
946 |
// case, Cost(merging BB) will be the sum of their frequencies. |
| 947 |
|
947 |
|
| 948 |
if (!isa(DestBB->begin())) |
948 |
if (!isa(DestBB->begin())) |
| 949 |
return true; |
949 |
return true; |
| 950 |
|
950 |
|
| 951 |
SmallPtrSet SameIncomingValueBBs; |
951 |
SmallPtrSet SameIncomingValueBBs; |
| 952 |
|
952 |
|
| 953 |
// Find all other incoming blocks from which incoming values of all PHIs in |
953 |
// Find all other incoming blocks from which incoming values of all PHIs in |
| 954 |
// DestBB are the same as the ones from BB. |
954 |
// DestBB are the same as the ones from BB. |
| 955 |
for (BasicBlock *DestBBPred : predecessors(DestBB)) { |
955 |
for (BasicBlock *DestBBPred : predecessors(DestBB)) { |
| 956 |
if (DestBBPred == BB) |
956 |
if (DestBBPred == BB) |
| 957 |
continue; |
957 |
continue; |
| 958 |
|
958 |
|
| 959 |
if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) { |
959 |
if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) { |
| 960 |
return DestPN.getIncomingValueForBlock(BB) == |
960 |
return DestPN.getIncomingValueForBlock(BB) == |
| 961 |
DestPN.getIncomingValueForBlock(DestBBPred); |
961 |
DestPN.getIncomingValueForBlock(DestBBPred); |
| 962 |
})) |
962 |
})) |
| 963 |
SameIncomingValueBBs.insert(DestBBPred); |
963 |
SameIncomingValueBBs.insert(DestBBPred); |
| 964 |
} |
964 |
} |
| 965 |
|
965 |
|
| 966 |
// See if all BB's incoming values are same as the value from Pred. In this |
966 |
// See if all BB's incoming values are same as the value from Pred. In this |
| 967 |
// case, no reason to skip merging because COPYs are expected to be place in |
967 |
// case, no reason to skip merging because COPYs are expected to be place in |
| 968 |
// Pred already. |
968 |
// Pred already. |
| 969 |
if (SameIncomingValueBBs.count(Pred)) |
969 |
if (SameIncomingValueBBs.count(Pred)) |
| 970 |
return true; |
970 |
return true; |
| 971 |
|
971 |
|
| 972 |
BlockFrequency PredFreq = BFI->getBlockFreq(Pred); |
972 |
BlockFrequency PredFreq = BFI->getBlockFreq(Pred); |
| 973 |
BlockFrequency BBFreq = BFI->getBlockFreq(BB); |
973 |
BlockFrequency BBFreq = BFI->getBlockFreq(BB); |
| 974 |
|
974 |
|
| 975 |
for (auto *SameValueBB : SameIncomingValueBBs) |
975 |
for (auto *SameValueBB : SameIncomingValueBBs) |
| 976 |
if (SameValueBB->getUniquePredecessor() == Pred && |
976 |
if (SameValueBB->getUniquePredecessor() == Pred && |
| 977 |
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) |
977 |
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) |
| 978 |
BBFreq += BFI->getBlockFreq(SameValueBB); |
978 |
BBFreq += BFI->getBlockFreq(SameValueBB); |
| 979 |
|
979 |
|
| 980 |
return PredFreq.getFrequency() <= |
980 |
return PredFreq.getFrequency() <= |
| 981 |
BBFreq.getFrequency() * FreqRatioToSkipMerge; |
981 |
BBFreq.getFrequency() * FreqRatioToSkipMerge; |
| 982 |
} |
982 |
} |
| 983 |
|
983 |
|
| 984 |
/// Return true if we can merge BB into DestBB if there is a single |
984 |
/// Return true if we can merge BB into DestBB if there is a single |
| 985 |
/// unconditional branch between them, and BB contains no other non-phi |
985 |
/// unconditional branch between them, and BB contains no other non-phi |
| 986 |
/// instructions. |
986 |
/// instructions. |
| 987 |
bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, |
987 |
bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, |
| 988 |
const BasicBlock *DestBB) const { |
988 |
const BasicBlock *DestBB) const { |
| 989 |
// We only want to eliminate blocks whose phi nodes are used by phi nodes in |
989 |
// We only want to eliminate blocks whose phi nodes are used by phi nodes in |
| 990 |
// the successor. If there are more complex condition (e.g. preheaders), |
990 |
// the successor. If there are more complex condition (e.g. preheaders), |
| 991 |
// don't mess around with them. |
991 |
// don't mess around with them. |
| 992 |
for (const PHINode &PN : BB->phis()) { |
992 |
for (const PHINode &PN : BB->phis()) { |
| 993 |
for (const User *U : PN.users()) { |
993 |
for (const User *U : PN.users()) { |
| 994 |
const Instruction *UI = cast(U); |
994 |
const Instruction *UI = cast(U); |
| 995 |
if (UI->getParent() != DestBB || !isa(UI)) |
995 |
if (UI->getParent() != DestBB || !isa(UI)) |
| 996 |
return false; |
996 |
return false; |
| 997 |
// If User is inside DestBB block and it is a PHINode then check |
997 |
// If User is inside DestBB block and it is a PHINode then check |
| 998 |
// incoming value. If incoming value is not from BB then this is |
998 |
// incoming value. If incoming value is not from BB then this is |
| 999 |
// a complex condition (e.g. preheaders) we want to avoid here. |
999 |
// a complex condition (e.g. preheaders) we want to avoid here. |
| 1000 |
if (UI->getParent() == DestBB) { |
1000 |
if (UI->getParent() == DestBB) { |
| 1001 |
if (const PHINode *UPN = dyn_cast(UI)) |
1001 |
if (const PHINode *UPN = dyn_cast(UI)) |
| 1002 |
for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { |
1002 |
for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { |
| 1003 |
Instruction *Insn = dyn_cast(UPN->getIncomingValue(I)); |
1003 |
Instruction *Insn = dyn_cast(UPN->getIncomingValue(I)); |
| 1004 |
if (Insn && Insn->getParent() == BB && |
1004 |
if (Insn && Insn->getParent() == BB && |
| 1005 |
Insn->getParent() != UPN->getIncomingBlock(I)) |
1005 |
Insn->getParent() != UPN->getIncomingBlock(I)) |
| 1006 |
return false; |
1006 |
return false; |
| 1007 |
} |
1007 |
} |
| 1008 |
} |
1008 |
} |
| 1009 |
} |
1009 |
} |
| 1010 |
} |
1010 |
} |
| 1011 |
|
1011 |
|
| 1012 |
// If BB and DestBB contain any common predecessors, then the phi nodes in BB |
1012 |
// If BB and DestBB contain any common predecessors, then the phi nodes in BB |
| 1013 |
// and DestBB may have conflicting incoming values for the block. If so, we |
1013 |
// and DestBB may have conflicting incoming values for the block. If so, we |
| 1014 |
// can't merge the block. |
1014 |
// can't merge the block. |
| 1015 |
const PHINode *DestBBPN = dyn_cast(DestBB->begin()); |
1015 |
const PHINode *DestBBPN = dyn_cast(DestBB->begin()); |
| 1016 |
if (!DestBBPN) |
1016 |
if (!DestBBPN) |
| 1017 |
return true; // no conflict. |
1017 |
return true; // no conflict. |
| 1018 |
|
1018 |
|
| 1019 |
// Collect the preds of BB. |
1019 |
// Collect the preds of BB. |
| 1020 |
SmallPtrSet BBPreds; |
1020 |
SmallPtrSet BBPreds; |
| 1021 |
if (const PHINode *BBPN = dyn_cast(BB->begin())) { |
1021 |
if (const PHINode *BBPN = dyn_cast(BB->begin())) { |
| 1022 |
// It is faster to get preds from a PHI than with pred_iterator. |
1022 |
// It is faster to get preds from a PHI than with pred_iterator. |
| 1023 |
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
1023 |
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
| 1024 |
BBPreds.insert(BBPN->getIncomingBlock(i)); |
1024 |
BBPreds.insert(BBPN->getIncomingBlock(i)); |
| 1025 |
} else { |
1025 |
} else { |
| 1026 |
BBPreds.insert(pred_begin(BB), pred_end(BB)); |
1026 |
BBPreds.insert(pred_begin(BB), pred_end(BB)); |
| 1027 |
} |
1027 |
} |
| 1028 |
|
1028 |
|
| 1029 |
// Walk the preds of DestBB. |
1029 |
// Walk the preds of DestBB. |
| 1030 |
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { |
1030 |
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { |
| 1031 |
BasicBlock *Pred = DestBBPN->getIncomingBlock(i); |
1031 |
BasicBlock *Pred = DestBBPN->getIncomingBlock(i); |
| 1032 |
if (BBPreds.count(Pred)) { // Common predecessor? |
1032 |
if (BBPreds.count(Pred)) { // Common predecessor? |
| 1033 |
for (const PHINode &PN : DestBB->phis()) { |
1033 |
for (const PHINode &PN : DestBB->phis()) { |
| 1034 |
const Value *V1 = PN.getIncomingValueForBlock(Pred); |
1034 |
const Value *V1 = PN.getIncomingValueForBlock(Pred); |
| 1035 |
const Value *V2 = PN.getIncomingValueForBlock(BB); |
1035 |
const Value *V2 = PN.getIncomingValueForBlock(BB); |
| 1036 |
|
1036 |
|
| 1037 |
// If V2 is a phi node in BB, look up what the mapped value will be. |
1037 |
// If V2 is a phi node in BB, look up what the mapped value will be. |
| 1038 |
if (const PHINode *V2PN = dyn_cast(V2)) |
1038 |
if (const PHINode *V2PN = dyn_cast(V2)) |
| 1039 |
if (V2PN->getParent() == BB) |
1039 |
if (V2PN->getParent() == BB) |
| 1040 |
V2 = V2PN->getIncomingValueForBlock(Pred); |
1040 |
V2 = V2PN->getIncomingValueForBlock(Pred); |
| 1041 |
|
1041 |
|
| 1042 |
// If there is a conflict, bail out. |
1042 |
// If there is a conflict, bail out. |
| 1043 |
if (V1 != V2) |
1043 |
if (V1 != V2) |
| 1044 |
return false; |
1044 |
return false; |
| 1045 |
} |
1045 |
} |
| 1046 |
} |
1046 |
} |
| 1047 |
} |
1047 |
} |
| 1048 |
|
1048 |
|
| 1049 |
return true; |
1049 |
return true; |
| 1050 |
} |
1050 |
} |
| 1051 |
|
1051 |
|
| 1052 |
/// Replace all old uses with new ones, and push the updated BBs into FreshBBs. |
1052 |
/// Replace all old uses with new ones, and push the updated BBs into FreshBBs. |
| 1053 |
static void replaceAllUsesWith(Value *Old, Value *New, |
1053 |
static void replaceAllUsesWith(Value *Old, Value *New, |
| 1054 |
SmallSet &FreshBBs, |
1054 |
SmallSet &FreshBBs, |
| 1055 |
bool IsHuge) { |
1055 |
bool IsHuge) { |
| 1056 |
auto *OldI = dyn_cast(Old); |
1056 |
auto *OldI = dyn_cast(Old); |
| 1057 |
if (OldI) { |
1057 |
if (OldI) { |
| 1058 |
for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end(); |
1058 |
for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end(); |
| 1059 |
UI != E; ++UI) { |
1059 |
UI != E; ++UI) { |
| 1060 |
Instruction *User = cast(*UI); |
1060 |
Instruction *User = cast(*UI); |
| 1061 |
if (IsHuge) |
1061 |
if (IsHuge) |
| 1062 |
FreshBBs.insert(User->getParent()); |
1062 |
FreshBBs.insert(User->getParent()); |
| 1063 |
} |
1063 |
} |
| 1064 |
} |
1064 |
} |
| 1065 |
Old->replaceAllUsesWith(New); |
1065 |
Old->replaceAllUsesWith(New); |
| 1066 |
} |
1066 |
} |
| 1067 |
|
1067 |
|
| 1068 |
/// Eliminate a basic block that has only phi's and an unconditional branch in |
1068 |
/// Eliminate a basic block that has only phi's and an unconditional branch in |
| 1069 |
/// it. |
1069 |
/// it. |
| 1070 |
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { |
1070 |
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { |
| 1071 |
BranchInst *BI = cast(BB->getTerminator()); |
1071 |
BranchInst *BI = cast(BB->getTerminator()); |
| 1072 |
BasicBlock *DestBB = BI->getSuccessor(0); |
1072 |
BasicBlock *DestBB = BI->getSuccessor(0); |
| 1073 |
|
1073 |
|
| 1074 |
LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" |
1074 |
LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" |
| 1075 |
<< *BB << *DestBB); |
1075 |
<< *BB << *DestBB); |
| 1076 |
|
1076 |
|
| 1077 |
// If the destination block has a single pred, then this is a trivial edge, |
1077 |
// If the destination block has a single pred, then this is a trivial edge, |
| 1078 |
// just collapse it. |
1078 |
// just collapse it. |
| 1079 |
if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { |
1079 |
if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { |
| 1080 |
if (SinglePred != DestBB) { |
1080 |
if (SinglePred != DestBB) { |
| 1081 |
assert(SinglePred == BB && |
1081 |
assert(SinglePred == BB && |
| 1082 |
"Single predecessor not the same as predecessor"); |
1082 |
"Single predecessor not the same as predecessor"); |
| 1083 |
// Merge DestBB into SinglePred/BB and delete it. |
1083 |
// Merge DestBB into SinglePred/BB and delete it. |
| 1084 |
MergeBlockIntoPredecessor(DestBB); |
1084 |
MergeBlockIntoPredecessor(DestBB); |
| 1085 |
// Note: BB(=SinglePred) will not be deleted on this path. |
1085 |
// Note: BB(=SinglePred) will not be deleted on this path. |
| 1086 |
// DestBB(=its single successor) is the one that was deleted. |
1086 |
// DestBB(=its single successor) is the one that was deleted. |
| 1087 |
LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"); |
1087 |
LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"); |
| 1088 |
|
1088 |
|
| 1089 |
if (IsHugeFunc) { |
1089 |
if (IsHugeFunc) { |
| 1090 |
// Update FreshBBs to optimize the merged BB. |
1090 |
// Update FreshBBs to optimize the merged BB. |
| 1091 |
FreshBBs.insert(SinglePred); |
1091 |
FreshBBs.insert(SinglePred); |
| 1092 |
FreshBBs.erase(DestBB); |
1092 |
FreshBBs.erase(DestBB); |
| 1093 |
} |
1093 |
} |
| 1094 |
return; |
1094 |
return; |
| 1095 |
} |
1095 |
} |
| 1096 |
} |
1096 |
} |
| 1097 |
|
1097 |
|
| 1098 |
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB |
1098 |
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB |
| 1099 |
// to handle the new incoming edges it is about to have. |
1099 |
// to handle the new incoming edges it is about to have. |
| 1100 |
for (PHINode &PN : DestBB->phis()) { |
1100 |
for (PHINode &PN : DestBB->phis()) { |
| 1101 |
// Remove the incoming value for BB, and remember it. |
1101 |
// Remove the incoming value for BB, and remember it. |
| 1102 |
Value *InVal = PN.removeIncomingValue(BB, false); |
1102 |
Value *InVal = PN.removeIncomingValue(BB, false); |
| 1103 |
|
1103 |
|
| 1104 |
// Two options: either the InVal is a phi node defined in BB or it is some |
1104 |
// Two options: either the InVal is a phi node defined in BB or it is some |
| 1105 |
// value that dominates BB. |
1105 |
// value that dominates BB. |
| 1106 |
PHINode *InValPhi = dyn_cast(InVal); |
1106 |
PHINode *InValPhi = dyn_cast(InVal); |
| 1107 |
if (InValPhi && InValPhi->getParent() == BB) { |
1107 |
if (InValPhi && InValPhi->getParent() == BB) { |
| 1108 |
// Add all of the input values of the input PHI as inputs of this phi. |
1108 |
// Add all of the input values of the input PHI as inputs of this phi. |
| 1109 |
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) |
1109 |
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) |
| 1110 |
PN.addIncoming(InValPhi->getIncomingValue(i), |
1110 |
PN.addIncoming(InValPhi->getIncomingValue(i), |
| 1111 |
InValPhi->getIncomingBlock(i)); |
1111 |
InValPhi->getIncomingBlock(i)); |
| 1112 |
} else { |
1112 |
} else { |
| 1113 |
// Otherwise, add one instance of the dominating value for each edge that |
1113 |
// Otherwise, add one instance of the dominating value for each edge that |
| 1114 |
// we will be adding. |
1114 |
// we will be adding. |
| 1115 |
if (PHINode *BBPN = dyn_cast(BB->begin())) { |
1115 |
if (PHINode *BBPN = dyn_cast(BB->begin())) { |
| 1116 |
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
1116 |
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
| 1117 |
PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); |
1117 |
PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); |
| 1118 |
} else { |
1118 |
} else { |
| 1119 |
for (BasicBlock *Pred : predecessors(BB)) |
1119 |
for (BasicBlock *Pred : predecessors(BB)) |
| 1120 |
PN.addIncoming(InVal, Pred); |
1120 |
PN.addIncoming(InVal, Pred); |
| 1121 |
} |
1121 |
} |
| 1122 |
} |
1122 |
} |
| 1123 |
} |
1123 |
} |
| 1124 |
|
1124 |
|
| 1125 |
// The PHIs are now updated, change everything that refers to BB to use |
1125 |
// The PHIs are now updated, change everything that refers to BB to use |
| 1126 |
// DestBB and remove BB. |
1126 |
// DestBB and remove BB. |
| 1127 |
BB->replaceAllUsesWith(DestBB); |
1127 |
BB->replaceAllUsesWith(DestBB); |
| 1128 |
BB->eraseFromParent(); |
1128 |
BB->eraseFromParent(); |
| 1129 |
++NumBlocksElim; |
1129 |
++NumBlocksElim; |
| 1130 |
|
1130 |
|
| 1131 |
LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); |
1131 |
LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); |
| 1132 |
} |
1132 |
} |
| 1133 |
|
1133 |
|
| 1134 |
// Computes a map of base pointer relocation instructions to corresponding |
1134 |
// Computes a map of base pointer relocation instructions to corresponding |
| 1135 |
// derived pointer relocation instructions given a vector of all relocate calls |
1135 |
// derived pointer relocation instructions given a vector of all relocate calls |
| 1136 |
static void computeBaseDerivedRelocateMap( |
1136 |
static void computeBaseDerivedRelocateMap( |
| 1137 |
const SmallVectorImpl &AllRelocateCalls, |
1137 |
const SmallVectorImpl &AllRelocateCalls, |
| 1138 |
DenseMap> |
1138 |
DenseMap> |
| 1139 |
&RelocateInstMap) { |
1139 |
&RelocateInstMap) { |
| 1140 |
// Collect information in two maps: one primarily for locating the base object |
1140 |
// Collect information in two maps: one primarily for locating the base object |
| 1141 |
// while filling the second map; the second map is the final structure holding |
1141 |
// while filling the second map; the second map is the final structure holding |
| 1142 |
// a mapping between Base and corresponding Derived relocate calls |
1142 |
// a mapping between Base and corresponding Derived relocate calls |
| 1143 |
DenseMap, GCRelocateInst *> RelocateIdxMap; |
1143 |
DenseMap, GCRelocateInst *> RelocateIdxMap; |
| 1144 |
for (auto *ThisRelocate : AllRelocateCalls) { |
1144 |
for (auto *ThisRelocate : AllRelocateCalls) { |
| 1145 |
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), |
1145 |
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), |
| 1146 |
ThisRelocate->getDerivedPtrIndex()); |
1146 |
ThisRelocate->getDerivedPtrIndex()); |
| 1147 |
RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); |
1147 |
RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); |
| 1148 |
} |
1148 |
} |
| 1149 |
for (auto &Item : RelocateIdxMap) { |
1149 |
for (auto &Item : RelocateIdxMap) { |
| 1150 |
std::pair Key = Item.first; |
1150 |
std::pair Key = Item.first; |
| 1151 |
if (Key.first == Key.second) |
1151 |
if (Key.first == Key.second) |
| 1152 |
// Base relocation: nothing to insert |
1152 |
// Base relocation: nothing to insert |
| 1153 |
continue; |
1153 |
continue; |
| 1154 |
|
1154 |
|
| 1155 |
GCRelocateInst *I = Item.second; |
1155 |
GCRelocateInst *I = Item.second; |
| 1156 |
auto BaseKey = std::make_pair(Key.first, Key.first); |
1156 |
auto BaseKey = std::make_pair(Key.first, Key.first); |
| 1157 |
|
1157 |
|
| 1158 |
// We're iterating over RelocateIdxMap so we cannot modify it. |
1158 |
// We're iterating over RelocateIdxMap so we cannot modify it. |
| 1159 |
auto MaybeBase = RelocateIdxMap.find(BaseKey); |
1159 |
auto MaybeBase = RelocateIdxMap.find(BaseKey); |
| 1160 |
if (MaybeBase == RelocateIdxMap.end()) |
1160 |
if (MaybeBase == RelocateIdxMap.end()) |
| 1161 |
// TODO: We might want to insert a new base object relocate and gep off |
1161 |
// TODO: We might want to insert a new base object relocate and gep off |
| 1162 |
// that, if there are enough derived object relocates. |
1162 |
// that, if there are enough derived object relocates. |
| 1163 |
continue; |
1163 |
continue; |
| 1164 |
|
1164 |
|
| 1165 |
RelocateInstMap[MaybeBase->second].push_back(I); |
1165 |
RelocateInstMap[MaybeBase->second].push_back(I); |
| 1166 |
} |
1166 |
} |
| 1167 |
} |
1167 |
} |
| 1168 |
|
1168 |
|
| 1169 |
// Accepts a GEP and extracts the operands into a vector provided they're all |
1169 |
// Accepts a GEP and extracts the operands into a vector provided they're all |
| 1170 |
// small integer constants |
1170 |
// small integer constants |
| 1171 |
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, |
1171 |
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, |
| 1172 |
SmallVectorImpl &OffsetV) { |
1172 |
SmallVectorImpl &OffsetV) { |
| 1173 |
for (unsigned i = 1; i < GEP->getNumOperands(); i++) { |
1173 |
for (unsigned i = 1; i < GEP->getNumOperands(); i++) { |
| 1174 |
// Only accept small constant integer operands |
1174 |
// Only accept small constant integer operands |
| 1175 |
auto *Op = dyn_cast(GEP->getOperand(i)); |
1175 |
auto *Op = dyn_cast(GEP->getOperand(i)); |
| 1176 |
if (!Op || Op->getZExtValue() > 20) |
1176 |
if (!Op || Op->getZExtValue() > 20) |
| 1177 |
return false; |
1177 |
return false; |
| 1178 |
} |
1178 |
} |
| 1179 |
|
1179 |
|
| 1180 |
for (unsigned i = 1; i < GEP->getNumOperands(); i++) |
1180 |
for (unsigned i = 1; i < GEP->getNumOperands(); i++) |
| 1181 |
OffsetV.push_back(GEP->getOperand(i)); |
1181 |
OffsetV.push_back(GEP->getOperand(i)); |
| 1182 |
return true; |
1182 |
return true; |
| 1183 |
} |
1183 |
} |
| 1184 |
|
1184 |
|
| 1185 |
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to |
1185 |
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to |
| 1186 |
// replace, computes a replacement, and affects it. |
1186 |
// replace, computes a replacement, and affects it. |
| 1187 |
static bool |
1187 |
static bool |
| 1188 |
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, |
1188 |
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, |
| 1189 |
const SmallVectorImpl &Targets) { |
1189 |
const SmallVectorImpl &Targets) { |
| 1190 |
bool MadeChange = false; |
1190 |
bool MadeChange = false; |
| 1191 |
// We must ensure the relocation of derived pointer is defined after |
1191 |
// We must ensure the relocation of derived pointer is defined after |
| 1192 |
// relocation of base pointer. If we find a relocation corresponding to base |
1192 |
// relocation of base pointer. If we find a relocation corresponding to base |
| 1193 |
// defined earlier than relocation of base then we move relocation of base |
1193 |
// defined earlier than relocation of base then we move relocation of base |
| 1194 |
// right before found relocation. We consider only relocation in the same |
1194 |
// right before found relocation. We consider only relocation in the same |
| 1195 |
// basic block as relocation of base. Relocations from other basic block will |
1195 |
// basic block as relocation of base. Relocations from other basic block will |
| 1196 |
// be skipped by optimization and we do not care about them. |
1196 |
// be skipped by optimization and we do not care about them. |
| 1197 |
for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); |
1197 |
for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); |
| 1198 |
&*R != RelocatedBase; ++R) |
1198 |
&*R != RelocatedBase; ++R) |
| 1199 |
if (auto *RI = dyn_cast(R)) |
1199 |
if (auto *RI = dyn_cast(R)) |
| 1200 |
if (RI->getStatepoint() == RelocatedBase->getStatepoint()) |
1200 |
if (RI->getStatepoint() == RelocatedBase->getStatepoint()) |
| 1201 |
if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { |
1201 |
if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { |
| 1202 |
RelocatedBase->moveBefore(RI); |
1202 |
RelocatedBase->moveBefore(RI); |
| 1203 |
break; |
1203 |
break; |
| 1204 |
} |
1204 |
} |
| 1205 |
|
1205 |
|
| 1206 |
for (GCRelocateInst *ToReplace : Targets) { |
1206 |
for (GCRelocateInst *ToReplace : Targets) { |
| 1207 |
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && |
1207 |
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && |
| 1208 |
"Not relocating a derived object of the original base object"); |
1208 |
"Not relocating a derived object of the original base object"); |
| 1209 |
if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { |
1209 |
if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { |
| 1210 |
// A duplicate relocate call. TODO: coalesce duplicates. |
1210 |
// A duplicate relocate call. TODO: coalesce duplicates. |
| 1211 |
continue; |
1211 |
continue; |
| 1212 |
} |
1212 |
} |
| 1213 |
|
1213 |
|
| 1214 |
if (RelocatedBase->getParent() != ToReplace->getParent()) { |
1214 |
if (RelocatedBase->getParent() != ToReplace->getParent()) { |
| 1215 |
// Base and derived relocates are in different basic blocks. |
1215 |
// Base and derived relocates are in different basic blocks. |
| 1216 |
// In this case transform is only valid when base dominates derived |
1216 |
// In this case transform is only valid when base dominates derived |
| 1217 |
// relocate. However it would be too expensive to check dominance |
1217 |
// relocate. However it would be too expensive to check dominance |
| 1218 |
// for each such relocate, so we skip the whole transformation. |
1218 |
// for each such relocate, so we skip the whole transformation. |
| 1219 |
continue; |
1219 |
continue; |
| 1220 |
} |
1220 |
} |
| 1221 |
|
1221 |
|
| 1222 |
Value *Base = ToReplace->getBasePtr(); |
1222 |
Value *Base = ToReplace->getBasePtr(); |
| 1223 |
auto *Derived = dyn_cast(ToReplace->getDerivedPtr()); |
1223 |
auto *Derived = dyn_cast(ToReplace->getDerivedPtr()); |
| 1224 |
if (!Derived || Derived->getPointerOperand() != Base) |
1224 |
if (!Derived || Derived->getPointerOperand() != Base) |
| 1225 |
continue; |
1225 |
continue; |
| 1226 |
|
1226 |
|
| 1227 |
SmallVector OffsetV; |
1227 |
SmallVector OffsetV; |
| 1228 |
if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) |
1228 |
if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) |
| 1229 |
continue; |
1229 |
continue; |
| 1230 |
|
1230 |
|
| 1231 |
// Create a Builder and replace the target callsite with a gep |
1231 |
// Create a Builder and replace the target callsite with a gep |
| 1232 |
assert(RelocatedBase->getNextNode() && |
1232 |
assert(RelocatedBase->getNextNode() && |
| 1233 |
"Should always have one since it's not a terminator"); |
1233 |
"Should always have one since it's not a terminator"); |
| 1234 |
|
1234 |
|
| 1235 |
// Insert after RelocatedBase |
1235 |
// Insert after RelocatedBase |
| 1236 |
IRBuilder<> Builder(RelocatedBase->getNextNode()); |
1236 |
IRBuilder<> Builder(RelocatedBase->getNextNode()); |
| 1237 |
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); |
1237 |
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); |
| 1238 |
|
1238 |
|
| 1239 |
// If gc_relocate does not match the actual type, cast it to the right type. |
1239 |
// If gc_relocate does not match the actual type, cast it to the right type. |
| 1240 |
// In theory, there must be a bitcast after gc_relocate if the type does not |
1240 |
// In theory, there must be a bitcast after gc_relocate if the type does not |
| 1241 |
// match, and we should reuse it to get the derived pointer. But it could be |
1241 |
// match, and we should reuse it to get the derived pointer. But it could be |
| 1242 |
// cases like this: |
1242 |
// cases like this: |
| 1243 |
// bb1: |
1243 |
// bb1: |
| 1244 |
// ... |
1244 |
// ... |
| 1245 |
// %g1 = call coldcc i8 addrspace(1)* |
1245 |
// %g1 = call coldcc i8 addrspace(1)* |
| 1246 |
// @llvm.experimental.gc.relocate.p1i8(...) br label %merge |
1246 |
// @llvm.experimental.gc.relocate.p1i8(...) br label %merge |
| 1247 |
// |
1247 |
// |
| 1248 |
// bb2: |
1248 |
// bb2: |
| 1249 |
// ... |
1249 |
// ... |
| 1250 |
// %g2 = call coldcc i8 addrspace(1)* |
1250 |
// %g2 = call coldcc i8 addrspace(1)* |
| 1251 |
// @llvm.experimental.gc.relocate.p1i8(...) br label %merge |
1251 |
// @llvm.experimental.gc.relocate.p1i8(...) br label %merge |
| 1252 |
// |
1252 |
// |
| 1253 |
// merge: |
1253 |
// merge: |
| 1254 |
// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] |
1254 |
// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] |
| 1255 |
// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* |
1255 |
// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* |
| 1256 |
// |
1256 |
// |
| 1257 |
// In this case, we can not find the bitcast any more. So we insert a new |
1257 |
// In this case, we can not find the bitcast any more. So we insert a new |
| 1258 |
// bitcast no matter there is already one or not. In this way, we can handle |
1258 |
// bitcast no matter there is already one or not. In this way, we can handle |
| 1259 |
// all cases, and the extra bitcast should be optimized away in later |
1259 |
// all cases, and the extra bitcast should be optimized away in later |
| 1260 |
// passes. |
1260 |
// passes. |
| 1261 |
Value *ActualRelocatedBase = RelocatedBase; |
1261 |
Value *ActualRelocatedBase = RelocatedBase; |
| 1262 |
if (RelocatedBase->getType() != Base->getType()) { |
1262 |
if (RelocatedBase->getType() != Base->getType()) { |
| 1263 |
ActualRelocatedBase = |
1263 |
ActualRelocatedBase = |
| 1264 |
Builder.CreateBitCast(RelocatedBase, Base->getType()); |
1264 |
Builder.CreateBitCast(RelocatedBase, Base->getType()); |
| 1265 |
} |
1265 |
} |
| 1266 |
Value *Replacement = |
1266 |
Value *Replacement = |
| 1267 |
Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase, |
1267 |
Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase, |
| 1268 |
ArrayRef(OffsetV)); |
1268 |
ArrayRef(OffsetV)); |
| 1269 |
Replacement->takeName(ToReplace); |
1269 |
Replacement->takeName(ToReplace); |
| 1270 |
// If the newly generated derived pointer's type does not match the original |
1270 |
// If the newly generated derived pointer's type does not match the original |
| 1271 |
// derived pointer's type, cast the new derived pointer to match it. Same |
1271 |
// derived pointer's type, cast the new derived pointer to match it. Same |
| 1272 |
// reasoning as above. |
1272 |
// reasoning as above. |
| 1273 |
Value *ActualReplacement = Replacement; |
1273 |
Value *ActualReplacement = Replacement; |
| 1274 |
if (Replacement->getType() != ToReplace->getType()) { |
1274 |
if (Replacement->getType() != ToReplace->getType()) { |
| 1275 |
ActualReplacement = |
1275 |
ActualReplacement = |
| 1276 |
Builder.CreateBitCast(Replacement, ToReplace->getType()); |
1276 |
Builder.CreateBitCast(Replacement, ToReplace->getType()); |
| 1277 |
} |
1277 |
} |
| 1278 |
ToReplace->replaceAllUsesWith(ActualReplacement); |
1278 |
ToReplace->replaceAllUsesWith(ActualReplacement); |
| 1279 |
ToReplace->eraseFromParent(); |
1279 |
ToReplace->eraseFromParent(); |
| 1280 |
|
1280 |
|
| 1281 |
MadeChange = true; |
1281 |
MadeChange = true; |
| 1282 |
} |
1282 |
} |
| 1283 |
return MadeChange; |
1283 |
return MadeChange; |
| 1284 |
} |
1284 |
} |
| 1285 |
|
1285 |
|
| 1286 |
// Turns this: |
1286 |
// Turns this: |
| 1287 |
// |
1287 |
// |
| 1288 |
// %base = ... |
1288 |
// %base = ... |
| 1289 |
// %ptr = gep %base + 15 |
1289 |
// %ptr = gep %base + 15 |
| 1290 |
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) |
1290 |
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) |
| 1291 |
// %base' = relocate(%tok, i32 4, i32 4) |
1291 |
// %base' = relocate(%tok, i32 4, i32 4) |
| 1292 |
// %ptr' = relocate(%tok, i32 4, i32 5) |
1292 |
// %ptr' = relocate(%tok, i32 4, i32 5) |
| 1293 |
// %val = load %ptr' |
1293 |
// %val = load %ptr' |
| 1294 |
// |
1294 |
// |
| 1295 |
// into this: |
1295 |
// into this: |
| 1296 |
// |
1296 |
// |
| 1297 |
// %base = ... |
1297 |
// %base = ... |
| 1298 |
// %ptr = gep %base + 15 |
1298 |
// %ptr = gep %base + 15 |
| 1299 |
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) |
1299 |
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) |
| 1300 |
// %base' = gc.relocate(%tok, i32 4, i32 4) |
1300 |
// %base' = gc.relocate(%tok, i32 4, i32 4) |
| 1301 |
// %ptr' = gep %base' + 15 |
1301 |
// %ptr' = gep %base' + 15 |
| 1302 |
// %val = load %ptr' |
1302 |
// %val = load %ptr' |
| 1303 |
bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { |
1303 |
bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) { |
| 1304 |
bool MadeChange = false; |
1304 |
bool MadeChange = false; |
| 1305 |
SmallVector AllRelocateCalls; |
1305 |
SmallVector AllRelocateCalls; |
| 1306 |
for (auto *U : I.users()) |
1306 |
for (auto *U : I.users()) |
| 1307 |
if (GCRelocateInst *Relocate = dyn_cast(U)) |
1307 |
if (GCRelocateInst *Relocate = dyn_cast(U)) |
| 1308 |
// Collect all the relocate calls associated with a statepoint |
1308 |
// Collect all the relocate calls associated with a statepoint |
| 1309 |
AllRelocateCalls.push_back(Relocate); |
1309 |
AllRelocateCalls.push_back(Relocate); |
| 1310 |
|
1310 |
|
| 1311 |
// We need at least one base pointer relocation + one derived pointer |
1311 |
// We need at least one base pointer relocation + one derived pointer |
| 1312 |
// relocation to mangle |
1312 |
// relocation to mangle |
| 1313 |
if (AllRelocateCalls.size() < 2) |
1313 |
if (AllRelocateCalls.size() < 2) |
| 1314 |
return false; |
1314 |
return false; |
| 1315 |
|
1315 |
|
| 1316 |
// RelocateInstMap is a mapping from the base relocate instruction to the |
1316 |
// RelocateInstMap is a mapping from the base relocate instruction to the |
| 1317 |
// corresponding derived relocate instructions |
1317 |
// corresponding derived relocate instructions |
| 1318 |
DenseMap> RelocateInstMap; |
1318 |
DenseMap> RelocateInstMap; |
| 1319 |
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); |
1319 |
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); |
| 1320 |
if (RelocateInstMap.empty()) |
1320 |
if (RelocateInstMap.empty()) |
| 1321 |
return false; |
1321 |
return false; |
| 1322 |
|
1322 |
|
| 1323 |
for (auto &Item : RelocateInstMap) |
1323 |
for (auto &Item : RelocateInstMap) |
| 1324 |
// Item.first is the RelocatedBase to offset against |
1324 |
// Item.first is the RelocatedBase to offset against |
| 1325 |
// Item.second is the vector of Targets to replace |
1325 |
// Item.second is the vector of Targets to replace |
| 1326 |
MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); |
1326 |
MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); |
| 1327 |
return MadeChange; |
1327 |
return MadeChange; |
| 1328 |
} |
1328 |
} |
| 1329 |
|
1329 |
|
| 1330 |
/// Sink the specified cast instruction into its user blocks. |
1330 |
/// Sink the specified cast instruction into its user blocks. |
| 1331 |
static bool SinkCast(CastInst *CI) { |
1331 |
static bool SinkCast(CastInst *CI) { |
| 1332 |
BasicBlock *DefBB = CI->getParent(); |
1332 |
BasicBlock *DefBB = CI->getParent(); |
| 1333 |
|
1333 |
|
| 1334 |
/// InsertedCasts - Only insert a cast in each block once. |
1334 |
/// InsertedCasts - Only insert a cast in each block once. |
| 1335 |
DenseMap InsertedCasts; |
1335 |
DenseMap InsertedCasts; |
| 1336 |
|
1336 |
|
| 1337 |
bool MadeChange = false; |
1337 |
bool MadeChange = false; |
| 1338 |
for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); |
1338 |
for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); |
| 1339 |
UI != E;) { |
1339 |
UI != E;) { |
| 1340 |
Use &TheUse = UI.getUse(); |
1340 |
Use &TheUse = UI.getUse(); |
| 1341 |
Instruction *User = cast(*UI); |
1341 |
Instruction *User = cast(*UI); |
| 1342 |
|
1342 |
|
| 1343 |
// Figure out which BB this cast is used in. For PHI's this is the |
1343 |
// Figure out which BB this cast is used in. For PHI's this is the |
| 1344 |
// appropriate predecessor block. |
1344 |
// appropriate predecessor block. |
| 1345 |
BasicBlock *UserBB = User->getParent(); |
1345 |
BasicBlock *UserBB = User->getParent(); |
| 1346 |
if (PHINode *PN = dyn_cast(User)) { |
1346 |
if (PHINode *PN = dyn_cast(User)) { |
| 1347 |
UserBB = PN->getIncomingBlock(TheUse); |
1347 |
UserBB = PN->getIncomingBlock(TheUse); |
| 1348 |
} |
1348 |
} |
| 1349 |
|
1349 |
|
| 1350 |
// Preincrement use iterator so we don't invalidate it. |
1350 |
// Preincrement use iterator so we don't invalidate it. |
| 1351 |
++UI; |
1351 |
++UI; |
| 1352 |
|
1352 |
|
| 1353 |
// The first insertion point of a block containing an EH pad is after the |
1353 |
// The first insertion point of a block containing an EH pad is after the |
| 1354 |
// pad. If the pad is the user, we cannot sink the cast past the pad. |
1354 |
// pad. If the pad is the user, we cannot sink the cast past the pad. |
| 1355 |
if (User->isEHPad()) |
1355 |
if (User->isEHPad()) |
| 1356 |
continue; |
1356 |
continue; |
| 1357 |
|
1357 |
|
| 1358 |
// If the block selected to receive the cast is an EH pad that does not |
1358 |
// If the block selected to receive the cast is an EH pad that does not |
| 1359 |
// allow non-PHI instructions before the terminator, we can't sink the |
1359 |
// allow non-PHI instructions before the terminator, we can't sink the |
| 1360 |
// cast. |
1360 |
// cast. |
| 1361 |
if (UserBB->getTerminator()->isEHPad()) |
1361 |
if (UserBB->getTerminator()->isEHPad()) |
| 1362 |
continue; |
1362 |
continue; |
| 1363 |
|
1363 |
|
| 1364 |
// If this user is in the same block as the cast, don't change the cast. |
1364 |
// If this user is in the same block as the cast, don't change the cast. |
| 1365 |
if (UserBB == DefBB) |
1365 |
if (UserBB == DefBB) |
| 1366 |
continue; |
1366 |
continue; |
| 1367 |
|
1367 |
|
| 1368 |
// If we have already inserted a cast into this block, use it. |
1368 |
// If we have already inserted a cast into this block, use it. |
| 1369 |
CastInst *&InsertedCast = InsertedCasts[UserBB]; |
1369 |
CastInst *&InsertedCast = InsertedCasts[UserBB]; |
| 1370 |
|
1370 |
|
| 1371 |
if (!InsertedCast) { |
1371 |
if (!InsertedCast) { |
| 1372 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
1372 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
| 1373 |
assert(InsertPt != UserBB->end()); |
1373 |
assert(InsertPt != UserBB->end()); |
| 1374 |
InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), |
1374 |
InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), |
| 1375 |
CI->getType(), "", &*InsertPt); |
1375 |
CI->getType(), "", &*InsertPt); |
| 1376 |
InsertedCast->setDebugLoc(CI->getDebugLoc()); |
1376 |
InsertedCast->setDebugLoc(CI->getDebugLoc()); |
| 1377 |
} |
1377 |
} |
| 1378 |
|
1378 |
|
| 1379 |
// Replace a use of the cast with a use of the new cast. |
1379 |
// Replace a use of the cast with a use of the new cast. |
| 1380 |
TheUse = InsertedCast; |
1380 |
TheUse = InsertedCast; |
| 1381 |
MadeChange = true; |
1381 |
MadeChange = true; |
| 1382 |
++NumCastUses; |
1382 |
++NumCastUses; |
| 1383 |
} |
1383 |
} |
| 1384 |
|
1384 |
|
| 1385 |
// If we removed all uses, nuke the cast. |
1385 |
// If we removed all uses, nuke the cast. |
| 1386 |
if (CI->use_empty()) { |
1386 |
if (CI->use_empty()) { |
| 1387 |
salvageDebugInfo(*CI); |
1387 |
salvageDebugInfo(*CI); |
| 1388 |
CI->eraseFromParent(); |
1388 |
CI->eraseFromParent(); |
| 1389 |
MadeChange = true; |
1389 |
MadeChange = true; |
| 1390 |
} |
1390 |
} |
| 1391 |
|
1391 |
|
| 1392 |
return MadeChange; |
1392 |
return MadeChange; |
| 1393 |
} |
1393 |
} |
| 1394 |
|
1394 |
|
| 1395 |
/// If the specified cast instruction is a noop copy (e.g. it's casting from |
1395 |
/// If the specified cast instruction is a noop copy (e.g. it's casting from |
| 1396 |
/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to |
1396 |
/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to |
| 1397 |
/// reduce the number of virtual registers that must be created and coalesced. |
1397 |
/// reduce the number of virtual registers that must be created and coalesced. |
| 1398 |
/// |
1398 |
/// |
| 1399 |
/// Return true if any changes are made. |
1399 |
/// Return true if any changes are made. |
| 1400 |
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, |
1400 |
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, |
| 1401 |
const DataLayout &DL) { |
1401 |
const DataLayout &DL) { |
| 1402 |
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition |
1402 |
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition |
| 1403 |
// than sinking only nop casts, but is helpful on some platforms. |
1403 |
// than sinking only nop casts, but is helpful on some platforms. |
| 1404 |
if (auto *ASC = dyn_cast(CI)) { |
1404 |
if (auto *ASC = dyn_cast(CI)) { |
| 1405 |
if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(), |
1405 |
if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(), |
| 1406 |
ASC->getDestAddressSpace())) |
1406 |
ASC->getDestAddressSpace())) |
| 1407 |
return false; |
1407 |
return false; |
| 1408 |
} |
1408 |
} |
| 1409 |
|
1409 |
|
| 1410 |
// If this is a noop copy, |
1410 |
// If this is a noop copy, |
| 1411 |
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); |
1411 |
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); |
| 1412 |
EVT DstVT = TLI.getValueType(DL, CI->getType()); |
1412 |
EVT DstVT = TLI.getValueType(DL, CI->getType()); |
| 1413 |
|
1413 |
|
| 1414 |
// This is an fp<->int conversion? |
1414 |
// This is an fp<->int conversion? |
| 1415 |
if (SrcVT.isInteger() != DstVT.isInteger()) |
1415 |
if (SrcVT.isInteger() != DstVT.isInteger()) |
| 1416 |
return false; |
1416 |
return false; |
| 1417 |
|
1417 |
|
| 1418 |
// If this is an extension, it will be a zero or sign extension, which |
1418 |
// If this is an extension, it will be a zero or sign extension, which |
| 1419 |
// isn't a noop. |
1419 |
// isn't a noop. |
| 1420 |
if (SrcVT.bitsLT(DstVT)) |
1420 |
if (SrcVT.bitsLT(DstVT)) |
| 1421 |
return false; |
1421 |
return false; |
| 1422 |
|
1422 |
|
| 1423 |
// If these values will be promoted, find out what they will be promoted |
1423 |
// If these values will be promoted, find out what they will be promoted |
| 1424 |
// to. This helps us consider truncates on PPC as noop copies when they |
1424 |
// to. This helps us consider truncates on PPC as noop copies when they |
| 1425 |
// are. |
1425 |
// are. |
| 1426 |
if (TLI.getTypeAction(CI->getContext(), SrcVT) == |
1426 |
if (TLI.getTypeAction(CI->getContext(), SrcVT) == |
| 1427 |
TargetLowering::TypePromoteInteger) |
1427 |
TargetLowering::TypePromoteInteger) |
| 1428 |
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); |
1428 |
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); |
| 1429 |
if (TLI.getTypeAction(CI->getContext(), DstVT) == |
1429 |
if (TLI.getTypeAction(CI->getContext(), DstVT) == |
| 1430 |
TargetLowering::TypePromoteInteger) |
1430 |
TargetLowering::TypePromoteInteger) |
| 1431 |
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); |
1431 |
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); |
| 1432 |
|
1432 |
|
| 1433 |
// If, after promotion, these are the same types, this is a noop copy. |
1433 |
// If, after promotion, these are the same types, this is a noop copy. |
| 1434 |
if (SrcVT != DstVT) |
1434 |
if (SrcVT != DstVT) |
| 1435 |
return false; |
1435 |
return false; |
| 1436 |
|
1436 |
|
| 1437 |
return SinkCast(CI); |
1437 |
return SinkCast(CI); |
| 1438 |
} |
1438 |
} |
| 1439 |
|
1439 |
|
| 1440 |
// Match a simple increment by constant operation. Note that if a sub is |
1440 |
// Match a simple increment by constant operation. Note that if a sub is |
| 1441 |
// matched, the step is negated (as if the step had been canonicalized to |
1441 |
// matched, the step is negated (as if the step had been canonicalized to |
| 1442 |
// an add, even though we leave the instruction alone.) |
1442 |
// an add, even though we leave the instruction alone.) |
| 1443 |
bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, |
1443 |
bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, |
| 1444 |
Constant *&Step) { |
1444 |
Constant *&Step) { |
| 1445 |
if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || |
1445 |
if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) || |
| 1446 |
match(IVInc, m_ExtractValue<0>(m_Intrinsic( |
1446 |
match(IVInc, m_ExtractValue<0>(m_Intrinsic( |
| 1447 |
m_Instruction(LHS), m_Constant(Step))))) |
1447 |
m_Instruction(LHS), m_Constant(Step))))) |
| 1448 |
return true; |
1448 |
return true; |
| 1449 |
if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) || |
1449 |
if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) || |
| 1450 |
match(IVInc, m_ExtractValue<0>(m_Intrinsic( |
1450 |
match(IVInc, m_ExtractValue<0>(m_Intrinsic( |
| 1451 |
m_Instruction(LHS), m_Constant(Step))))) { |
1451 |
m_Instruction(LHS), m_Constant(Step))))) { |
| 1452 |
Step = ConstantExpr::getNeg(Step); |
1452 |
Step = ConstantExpr::getNeg(Step); |
| 1453 |
return true; |
1453 |
return true; |
| 1454 |
} |
1454 |
} |
| 1455 |
return false; |
1455 |
return false; |
| 1456 |
} |
1456 |
} |
| 1457 |
|
1457 |
|
| 1458 |
/// If given \p PN is an inductive variable with value IVInc coming from the |
1458 |
/// If given \p PN is an inductive variable with value IVInc coming from the |
| 1459 |
/// backedge, and on each iteration it gets increased by Step, return pair |
1459 |
/// backedge, and on each iteration it gets increased by Step, return pair |
| 1460 |
/// . Otherwise, return std::nullopt. |
1460 |
/// . Otherwise, return std::nullopt. |
| 1461 |
static std::optional> |
1461 |
static std::optional> |
| 1462 |
getIVIncrement(const PHINode *PN, const LoopInfo *LI) { |
1462 |
getIVIncrement(const PHINode *PN, const LoopInfo *LI) { |
| 1463 |
const Loop *L = LI->getLoopFor(PN->getParent()); |
1463 |
const Loop *L = LI->getLoopFor(PN->getParent()); |
| 1464 |
if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) |
1464 |
if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) |
| 1465 |
return std::nullopt; |
1465 |
return std::nullopt; |
| 1466 |
auto *IVInc = |
1466 |
auto *IVInc = |
| 1467 |
dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); |
1467 |
dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); |
| 1468 |
if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) |
1468 |
if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) |
| 1469 |
return std::nullopt; |
1469 |
return std::nullopt; |
| 1470 |
Instruction *LHS = nullptr; |
1470 |
Instruction *LHS = nullptr; |
| 1471 |
Constant *Step = nullptr; |
1471 |
Constant *Step = nullptr; |
| 1472 |
if (matchIncrement(IVInc, LHS, Step) && LHS == PN) |
1472 |
if (matchIncrement(IVInc, LHS, Step) && LHS == PN) |
| 1473 |
return std::make_pair(IVInc, Step); |
1473 |
return std::make_pair(IVInc, Step); |
| 1474 |
return std::nullopt; |
1474 |
return std::nullopt; |
| 1475 |
} |
1475 |
} |
| 1476 |
|
1476 |
|
| 1477 |
static bool isIVIncrement(const Value *V, const LoopInfo *LI) { |
1477 |
static bool isIVIncrement(const Value *V, const LoopInfo *LI) { |
| 1478 |
auto *I = dyn_cast(V); |
1478 |
auto *I = dyn_cast(V); |
| 1479 |
if (!I) |
1479 |
if (!I) |
| 1480 |
return false; |
1480 |
return false; |
| 1481 |
Instruction *LHS = nullptr; |
1481 |
Instruction *LHS = nullptr; |
| 1482 |
Constant *Step = nullptr; |
1482 |
Constant *Step = nullptr; |
| 1483 |
if (!matchIncrement(I, LHS, Step)) |
1483 |
if (!matchIncrement(I, LHS, Step)) |
| 1484 |
return false; |
1484 |
return false; |
| 1485 |
if (auto *PN = dyn_cast(LHS)) |
1485 |
if (auto *PN = dyn_cast(LHS)) |
| 1486 |
if (auto IVInc = getIVIncrement(PN, LI)) |
1486 |
if (auto IVInc = getIVIncrement(PN, LI)) |
| 1487 |
return IVInc->first == I; |
1487 |
return IVInc->first == I; |
| 1488 |
return false; |
1488 |
return false; |
| 1489 |
} |
1489 |
} |
| 1490 |
|
1490 |
|
| 1491 |
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, |
1491 |
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, |
| 1492 |
Value *Arg0, Value *Arg1, |
1492 |
Value *Arg0, Value *Arg1, |
| 1493 |
CmpInst *Cmp, |
1493 |
CmpInst *Cmp, |
| 1494 |
Intrinsic::ID IID) { |
1494 |
Intrinsic::ID IID) { |
| 1495 |
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) { |
1495 |
auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) { |
| 1496 |
if (!isIVIncrement(BO, LI)) |
1496 |
if (!isIVIncrement(BO, LI)) |
| 1497 |
return false; |
1497 |
return false; |
| 1498 |
const Loop *L = LI->getLoopFor(BO->getParent()); |
1498 |
const Loop *L = LI->getLoopFor(BO->getParent()); |
| 1499 |
assert(L && "L should not be null after isIVIncrement()"); |
1499 |
assert(L && "L should not be null after isIVIncrement()"); |
| 1500 |
// Do not risk on moving increment into a child loop. |
1500 |
// Do not risk on moving increment into a child loop. |
| 1501 |
if (LI->getLoopFor(Cmp->getParent()) != L) |
1501 |
if (LI->getLoopFor(Cmp->getParent()) != L) |
| 1502 |
return false; |
1502 |
return false; |
| 1503 |
|
1503 |
|
| 1504 |
// Finally, we need to ensure that the insert point will dominate all |
1504 |
// Finally, we need to ensure that the insert point will dominate all |
| 1505 |
// existing uses of the increment. |
1505 |
// existing uses of the increment. |
| 1506 |
|
1506 |
|
| 1507 |
auto &DT = getDT(*BO->getParent()->getParent()); |
1507 |
auto &DT = getDT(*BO->getParent()->getParent()); |
| 1508 |
if (DT.dominates(Cmp->getParent(), BO->getParent())) |
1508 |
if (DT.dominates(Cmp->getParent(), BO->getParent())) |
| 1509 |
// If we're moving up the dom tree, all uses are trivially dominated. |
1509 |
// If we're moving up the dom tree, all uses are trivially dominated. |
| 1510 |
// (This is the common case for code produced by LSR.) |
1510 |
// (This is the common case for code produced by LSR.) |
| 1511 |
return true; |
1511 |
return true; |
| 1512 |
|
1512 |
|
| 1513 |
// Otherwise, special case the single use in the phi recurrence. |
1513 |
// Otherwise, special case the single use in the phi recurrence. |
| 1514 |
return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch()); |
1514 |
return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch()); |
| 1515 |
}; |
1515 |
}; |
| 1516 |
if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) { |
1516 |
if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) { |
| 1517 |
// We used to use a dominator tree here to allow multi-block optimization. |
1517 |
// We used to use a dominator tree here to allow multi-block optimization. |
| 1518 |
// But that was problematic because: |
1518 |
// But that was problematic because: |
| 1519 |
// 1. It could cause a perf regression by hoisting the math op into the |
1519 |
// 1. It could cause a perf regression by hoisting the math op into the |
| 1520 |
// critical path. |
1520 |
// critical path. |
| 1521 |
// 2. It could cause a perf regression by creating a value that was live |
1521 |
// 2. It could cause a perf regression by creating a value that was live |
| 1522 |
// across multiple blocks and increasing register pressure. |
1522 |
// across multiple blocks and increasing register pressure. |
| 1523 |
// 3. Use of a dominator tree could cause large compile-time regression. |
1523 |
// 3. Use of a dominator tree could cause large compile-time regression. |
| 1524 |
// This is because we recompute the DT on every change in the main CGP |
1524 |
// This is because we recompute the DT on every change in the main CGP |
| 1525 |
// run-loop. The recomputing is probably unnecessary in many cases, so if |
1525 |
// run-loop. The recomputing is probably unnecessary in many cases, so if |
| 1526 |
// that was fixed, using a DT here would be ok. |
1526 |
// that was fixed, using a DT here would be ok. |
| 1527 |
// |
1527 |
// |
| 1528 |
// There is one important particular case we still want to handle: if BO is |
1528 |
// There is one important particular case we still want to handle: if BO is |
| 1529 |
// the IV increment. Important properties that make it profitable: |
1529 |
// the IV increment. Important properties that make it profitable: |
| 1530 |
// - We can speculate IV increment anywhere in the loop (as long as the |
1530 |
// - We can speculate IV increment anywhere in the loop (as long as the |
| 1531 |
// indvar Phi is its only user); |
1531 |
// indvar Phi is its only user); |
| 1532 |
// - Upon computing Cmp, we effectively compute something equivalent to the |
1532 |
// - Upon computing Cmp, we effectively compute something equivalent to the |
| 1533 |
// IV increment (despite it loops differently in the IR). So moving it up |
1533 |
// IV increment (despite it loops differently in the IR). So moving it up |
| 1534 |
// to the cmp point does not really increase register pressure. |
1534 |
// to the cmp point does not really increase register pressure. |
| 1535 |
return false; |
1535 |
return false; |
| 1536 |
} |
1536 |
} |
| 1537 |
|
1537 |
|
| 1538 |
// We allow matching the canonical IR (add X, C) back to (usubo X, -C). |
1538 |
// We allow matching the canonical IR (add X, C) back to (usubo X, -C). |
| 1539 |
if (BO->getOpcode() == Instruction::Add && |
1539 |
if (BO->getOpcode() == Instruction::Add && |
| 1540 |
IID == Intrinsic::usub_with_overflow) { |
1540 |
IID == Intrinsic::usub_with_overflow) { |
| 1541 |
assert(isa(Arg1) && "Unexpected input for usubo"); |
1541 |
assert(isa(Arg1) && "Unexpected input for usubo"); |
| 1542 |
Arg1 = ConstantExpr::getNeg(cast(Arg1)); |
1542 |
Arg1 = ConstantExpr::getNeg(cast(Arg1)); |
| 1543 |
} |
1543 |
} |
| 1544 |
|
1544 |
|
| 1545 |
// Insert at the first instruction of the pair. |
1545 |
// Insert at the first instruction of the pair. |
| 1546 |
Instruction *InsertPt = nullptr; |
1546 |
Instruction *InsertPt = nullptr; |
| 1547 |
for (Instruction &Iter : *Cmp->getParent()) { |
1547 |
for (Instruction &Iter : *Cmp->getParent()) { |
| 1548 |
// If BO is an XOR, it is not guaranteed that it comes after both inputs to |
1548 |
// If BO is an XOR, it is not guaranteed that it comes after both inputs to |
| 1549 |
// the overflow intrinsic are defined. |
1549 |
// the overflow intrinsic are defined. |
| 1550 |
if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) { |
1550 |
if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) { |
| 1551 |
InsertPt = &Iter; |
1551 |
InsertPt = &Iter; |
| 1552 |
break; |
1552 |
break; |
| 1553 |
} |
1553 |
} |
| 1554 |
} |
1554 |
} |
| 1555 |
assert(InsertPt != nullptr && "Parent block did not contain cmp or binop"); |
1555 |
assert(InsertPt != nullptr && "Parent block did not contain cmp or binop"); |
| 1556 |
|
1556 |
|
| 1557 |
IRBuilder<> Builder(InsertPt); |
1557 |
IRBuilder<> Builder(InsertPt); |
| 1558 |
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); |
1558 |
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); |
| 1559 |
if (BO->getOpcode() != Instruction::Xor) { |
1559 |
if (BO->getOpcode() != Instruction::Xor) { |
| 1560 |
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); |
1560 |
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); |
| 1561 |
replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc); |
1561 |
replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc); |
| 1562 |
} else |
1562 |
} else |
| 1563 |
assert(BO->hasOneUse() && |
1563 |
assert(BO->hasOneUse() && |
| 1564 |
"Patterns with XOr should use the BO only in the compare"); |
1564 |
"Patterns with XOr should use the BO only in the compare"); |
| 1565 |
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); |
1565 |
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); |
| 1566 |
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc); |
1566 |
replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc); |
| 1567 |
Cmp->eraseFromParent(); |
1567 |
Cmp->eraseFromParent(); |
| 1568 |
BO->eraseFromParent(); |
1568 |
BO->eraseFromParent(); |
| 1569 |
return true; |
1569 |
return true; |
| 1570 |
} |
1570 |
} |
| 1571 |
|
1571 |
|
| 1572 |
/// Match special-case patterns that check for unsigned add overflow. |
1572 |
/// Match special-case patterns that check for unsigned add overflow. |
| 1573 |
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, |
1573 |
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, |
| 1574 |
BinaryOperator *&Add) { |
1574 |
BinaryOperator *&Add) { |
| 1575 |
// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) |
1575 |
// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) |
| 1576 |
// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) |
1576 |
// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) |
| 1577 |
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); |
1577 |
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); |
| 1578 |
|
1578 |
|
| 1579 |
// We are not expecting non-canonical/degenerate code. Just bail out. |
1579 |
// We are not expecting non-canonical/degenerate code. Just bail out. |
| 1580 |
if (isa(A)) |
1580 |
if (isa(A)) |
| 1581 |
return false; |
1581 |
return false; |
| 1582 |
|
1582 |
|
| 1583 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
1583 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
| 1584 |
if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) |
1584 |
if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) |
| 1585 |
B = ConstantInt::get(B->getType(), 1); |
1585 |
B = ConstantInt::get(B->getType(), 1); |
| 1586 |
else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) |
1586 |
else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) |
| 1587 |
B = ConstantInt::get(B->getType(), -1); |
1587 |
B = ConstantInt::get(B->getType(), -1); |
| 1588 |
else |
1588 |
else |
| 1589 |
return false; |
1589 |
return false; |
| 1590 |
|
1590 |
|
| 1591 |
// Check the users of the variable operand of the compare looking for an add |
1591 |
// Check the users of the variable operand of the compare looking for an add |
| 1592 |
// with the adjusted constant. |
1592 |
// with the adjusted constant. |
| 1593 |
for (User *U : A->users()) { |
1593 |
for (User *U : A->users()) { |
| 1594 |
if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { |
1594 |
if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { |
| 1595 |
Add = cast(U); |
1595 |
Add = cast(U); |
| 1596 |
return true; |
1596 |
return true; |
| 1597 |
} |
1597 |
} |
| 1598 |
} |
1598 |
} |
| 1599 |
return false; |
1599 |
return false; |
| 1600 |
} |
1600 |
} |
| 1601 |
|
1601 |
|
| 1602 |
/// Try to combine the compare into a call to the llvm.uadd.with.overflow |
1602 |
/// Try to combine the compare into a call to the llvm.uadd.with.overflow |
| 1603 |
/// intrinsic. Return true if any changes were made. |
1603 |
/// intrinsic. Return true if any changes were made. |
| 1604 |
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, |
1604 |
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, |
| 1605 |
ModifyDT &ModifiedDT) { |
1605 |
ModifyDT &ModifiedDT) { |
| 1606 |
bool EdgeCase = false; |
1606 |
bool EdgeCase = false; |
| 1607 |
Value *A, *B; |
1607 |
Value *A, *B; |
| 1608 |
BinaryOperator *Add; |
1608 |
BinaryOperator *Add; |
| 1609 |
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { |
1609 |
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { |
| 1610 |
if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) |
1610 |
if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) |
| 1611 |
return false; |
1611 |
return false; |
| 1612 |
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. |
1612 |
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. |
| 1613 |
A = Add->getOperand(0); |
1613 |
A = Add->getOperand(0); |
| 1614 |
B = Add->getOperand(1); |
1614 |
B = Add->getOperand(1); |
| 1615 |
EdgeCase = true; |
1615 |
EdgeCase = true; |
| 1616 |
} |
1616 |
} |
| 1617 |
|
1617 |
|
| 1618 |
if (!TLI->shouldFormOverflowOp(ISD::UADDO, |
1618 |
if (!TLI->shouldFormOverflowOp(ISD::UADDO, |
| 1619 |
TLI->getValueType(*DL, Add->getType()), |
1619 |
TLI->getValueType(*DL, Add->getType()), |
| 1620 |
Add->hasNUsesOrMore(EdgeCase ? 1 : 2))) |
1620 |
Add->hasNUsesOrMore(EdgeCase ? 1 : 2))) |
| 1621 |
return false; |
1621 |
return false; |
| 1622 |
|
1622 |
|
| 1623 |
// We don't want to move around uses of condition values this late, so we |
1623 |
// We don't want to move around uses of condition values this late, so we |
| 1624 |
// check if it is legal to create the call to the intrinsic in the basic |
1624 |
// check if it is legal to create the call to the intrinsic in the basic |
| 1625 |
// block containing the icmp. |
1625 |
// block containing the icmp. |
| 1626 |
if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) |
1626 |
if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) |
| 1627 |
return false; |
1627 |
return false; |
| 1628 |
|
1628 |
|
| 1629 |
if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, |
1629 |
if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, |
| 1630 |
Intrinsic::uadd_with_overflow)) |
1630 |
Intrinsic::uadd_with_overflow)) |
| 1631 |
return false; |
1631 |
return false; |
| 1632 |
|
1632 |
|
| 1633 |
// Reset callers - do not crash by iterating over a dead instruction. |
1633 |
// Reset callers - do not crash by iterating over a dead instruction. |
| 1634 |
ModifiedDT = ModifyDT::ModifyInstDT; |
1634 |
ModifiedDT = ModifyDT::ModifyInstDT; |
| 1635 |
return true; |
1635 |
return true; |
| 1636 |
} |
1636 |
} |
| 1637 |
|
1637 |
|
| 1638 |
bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, |
1638 |
bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, |
| 1639 |
ModifyDT &ModifiedDT) { |
1639 |
ModifyDT &ModifiedDT) { |
| 1640 |
// We are not expecting non-canonical/degenerate code. Just bail out. |
1640 |
// We are not expecting non-canonical/degenerate code. Just bail out. |
| 1641 |
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); |
1641 |
Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); |
| 1642 |
if (isa(A) && isa(B)) |
1642 |
if (isa(A) && isa(B)) |
| 1643 |
return false; |
1643 |
return false; |
| 1644 |
|
1644 |
|
| 1645 |
// Convert (A u> B) to (A u< B) to simplify pattern matching. |
1645 |
// Convert (A u> B) to (A u< B) to simplify pattern matching. |
| 1646 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
1646 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
| 1647 |
if (Pred == ICmpInst::ICMP_UGT) { |
1647 |
if (Pred == ICmpInst::ICMP_UGT) { |
| 1648 |
std::swap(A, B); |
1648 |
std::swap(A, B); |
| 1649 |
Pred = ICmpInst::ICMP_ULT; |
1649 |
Pred = ICmpInst::ICMP_ULT; |
| 1650 |
} |
1650 |
} |
| 1651 |
// Convert special-case: (A == 0) is the same as (A u< 1). |
1651 |
// Convert special-case: (A == 0) is the same as (A u< 1). |
| 1652 |
if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { |
1652 |
if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { |
| 1653 |
B = ConstantInt::get(B->getType(), 1); |
1653 |
B = ConstantInt::get(B->getType(), 1); |
| 1654 |
Pred = ICmpInst::ICMP_ULT; |
1654 |
Pred = ICmpInst::ICMP_ULT; |
| 1655 |
} |
1655 |
} |
| 1656 |
// Convert special-case: (A != 0) is the same as (0 u< A). |
1656 |
// Convert special-case: (A != 0) is the same as (0 u< A). |
| 1657 |
if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) { |
1657 |
if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) { |
| 1658 |
std::swap(A, B); |
1658 |
std::swap(A, B); |
| 1659 |
Pred = ICmpInst::ICMP_ULT; |
1659 |
Pred = ICmpInst::ICMP_ULT; |
| 1660 |
} |
1660 |
} |
| 1661 |
if (Pred != ICmpInst::ICMP_ULT) |
1661 |
if (Pred != ICmpInst::ICMP_ULT) |
| 1662 |
return false; |
1662 |
return false; |
| 1663 |
|
1663 |
|
| 1664 |
// Walk the users of a variable operand of a compare looking for a subtract or |
1664 |
// Walk the users of a variable operand of a compare looking for a subtract or |
| 1665 |
// add with that same operand. Also match the 2nd operand of the compare to |
1665 |
// add with that same operand. Also match the 2nd operand of the compare to |
| 1666 |
// the add/sub, but that may be a negated constant operand of an add. |
1666 |
// the add/sub, but that may be a negated constant operand of an add. |
| 1667 |
Value *CmpVariableOperand = isa(A) ? B : A; |
1667 |
Value *CmpVariableOperand = isa(A) ? B : A; |
| 1668 |
BinaryOperator *Sub = nullptr; |
1668 |
BinaryOperator *Sub = nullptr; |
| 1669 |
for (User *U : CmpVariableOperand->users()) { |
1669 |
for (User *U : CmpVariableOperand->users()) { |
| 1670 |
// A - B, A u< B --> usubo(A, B) |
1670 |
// A - B, A u< B --> usubo(A, B) |
| 1671 |
if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { |
1671 |
if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { |
| 1672 |
Sub = cast(U); |
1672 |
Sub = cast(U); |
| 1673 |
break; |
1673 |
break; |
| 1674 |
} |
1674 |
} |
| 1675 |
|
1675 |
|
| 1676 |
// A + (-C), A u< C (canonicalized form of (sub A, C)) |
1676 |
// A + (-C), A u< C (canonicalized form of (sub A, C)) |
| 1677 |
const APInt *CmpC, *AddC; |
1677 |
const APInt *CmpC, *AddC; |
| 1678 |
if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && |
1678 |
if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && |
| 1679 |
match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { |
1679 |
match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { |
| 1680 |
Sub = cast(U); |
1680 |
Sub = cast(U); |
| 1681 |
break; |
1681 |
break; |
| 1682 |
} |
1682 |
} |
| 1683 |
} |
1683 |
} |
| 1684 |
if (!Sub) |
1684 |
if (!Sub) |
| 1685 |
return false; |
1685 |
return false; |
| 1686 |
|
1686 |
|
| 1687 |
if (!TLI->shouldFormOverflowOp(ISD::USUBO, |
1687 |
if (!TLI->shouldFormOverflowOp(ISD::USUBO, |
| 1688 |
TLI->getValueType(*DL, Sub->getType()), |
1688 |
TLI->getValueType(*DL, Sub->getType()), |
| 1689 |
Sub->hasNUsesOrMore(1))) |
1689 |
Sub->hasNUsesOrMore(1))) |
| 1690 |
return false; |
1690 |
return false; |
| 1691 |
|
1691 |
|
| 1692 |
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), |
1692 |
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), |
| 1693 |
Cmp, Intrinsic::usub_with_overflow)) |
1693 |
Cmp, Intrinsic::usub_with_overflow)) |
| 1694 |
return false; |
1694 |
return false; |
| 1695 |
|
1695 |
|
| 1696 |
// Reset callers - do not crash by iterating over a dead instruction. |
1696 |
// Reset callers - do not crash by iterating over a dead instruction. |
| 1697 |
ModifiedDT = ModifyDT::ModifyInstDT; |
1697 |
ModifiedDT = ModifyDT::ModifyInstDT; |
| 1698 |
return true; |
1698 |
return true; |
| 1699 |
} |
1699 |
} |
| 1700 |
|
1700 |
|
| 1701 |
/// Sink the given CmpInst into user blocks to reduce the number of virtual |
1701 |
/// Sink the given CmpInst into user blocks to reduce the number of virtual |
| 1702 |
/// registers that must be created and coalesced. This is a clear win except on |
1702 |
/// registers that must be created and coalesced. This is a clear win except on |
| 1703 |
/// targets with multiple condition code registers (PowerPC), where it might |
1703 |
/// targets with multiple condition code registers (PowerPC), where it might |
| 1704 |
/// lose; some adjustment may be wanted there. |
1704 |
/// lose; some adjustment may be wanted there. |
| 1705 |
/// |
1705 |
/// |
| 1706 |
/// Return true if any changes are made. |
1706 |
/// Return true if any changes are made. |
| 1707 |
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { |
1707 |
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { |
| 1708 |
if (TLI.hasMultipleConditionRegisters()) |
1708 |
if (TLI.hasMultipleConditionRegisters()) |
| 1709 |
return false; |
1709 |
return false; |
| 1710 |
|
1710 |
|
| 1711 |
// Avoid sinking soft-FP comparisons, since this can move them into a loop. |
1711 |
// Avoid sinking soft-FP comparisons, since this can move them into a loop. |
| 1712 |
if (TLI.useSoftFloat() && isa(Cmp)) |
1712 |
if (TLI.useSoftFloat() && isa(Cmp)) |
| 1713 |
return false; |
1713 |
return false; |
| 1714 |
|
1714 |
|
| 1715 |
// Only insert a cmp in each block once. |
1715 |
// Only insert a cmp in each block once. |
| 1716 |
DenseMap InsertedCmps; |
1716 |
DenseMap InsertedCmps; |
| 1717 |
|
1717 |
|
| 1718 |
bool MadeChange = false; |
1718 |
bool MadeChange = false; |
| 1719 |
for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); |
1719 |
for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); |
| 1720 |
UI != E;) { |
1720 |
UI != E;) { |
| 1721 |
Use &TheUse = UI.getUse(); |
1721 |
Use &TheUse = UI.getUse(); |
| 1722 |
Instruction *User = cast(*UI); |
1722 |
Instruction *User = cast(*UI); |
| 1723 |
|
1723 |
|
| 1724 |
// Preincrement use iterator so we don't invalidate it. |
1724 |
// Preincrement use iterator so we don't invalidate it. |
| 1725 |
++UI; |
1725 |
++UI; |
| 1726 |
|
1726 |
|
| 1727 |
// Don't bother for PHI nodes. |
1727 |
// Don't bother for PHI nodes. |
| 1728 |
if (isa(User)) |
1728 |
if (isa(User)) |
| 1729 |
continue; |
1729 |
continue; |
| 1730 |
|
1730 |
|
| 1731 |
// Figure out which BB this cmp is used in. |
1731 |
// Figure out which BB this cmp is used in. |
| 1732 |
BasicBlock *UserBB = User->getParent(); |
1732 |
BasicBlock *UserBB = User->getParent(); |
| 1733 |
BasicBlock *DefBB = Cmp->getParent(); |
1733 |
BasicBlock *DefBB = Cmp->getParent(); |
| 1734 |
|
1734 |
|
| 1735 |
// If this user is in the same block as the cmp, don't change the cmp. |
1735 |
// If this user is in the same block as the cmp, don't change the cmp. |
| 1736 |
if (UserBB == DefBB) |
1736 |
if (UserBB == DefBB) |
| 1737 |
continue; |
1737 |
continue; |
| 1738 |
|
1738 |
|
| 1739 |
// If we have already inserted a cmp into this block, use it. |
1739 |
// If we have already inserted a cmp into this block, use it. |
| 1740 |
CmpInst *&InsertedCmp = InsertedCmps[UserBB]; |
1740 |
CmpInst *&InsertedCmp = InsertedCmps[UserBB]; |
| 1741 |
|
1741 |
|
| 1742 |
if (!InsertedCmp) { |
1742 |
if (!InsertedCmp) { |
| 1743 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
1743 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
| 1744 |
assert(InsertPt != UserBB->end()); |
1744 |
assert(InsertPt != UserBB->end()); |
| 1745 |
InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), |
1745 |
InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), |
| 1746 |
Cmp->getOperand(0), Cmp->getOperand(1), "", |
1746 |
Cmp->getOperand(0), Cmp->getOperand(1), "", |
| 1747 |
&*InsertPt); |
1747 |
&*InsertPt); |
| 1748 |
// Propagate the debug info. |
1748 |
// Propagate the debug info. |
| 1749 |
InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); |
1749 |
InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); |
| 1750 |
} |
1750 |
} |
| 1751 |
|
1751 |
|
| 1752 |
// Replace a use of the cmp with a use of the new cmp. |
1752 |
// Replace a use of the cmp with a use of the new cmp. |
| 1753 |
TheUse = InsertedCmp; |
1753 |
TheUse = InsertedCmp; |
| 1754 |
MadeChange = true; |
1754 |
MadeChange = true; |
| 1755 |
++NumCmpUses; |
1755 |
++NumCmpUses; |
| 1756 |
} |
1756 |
} |
| 1757 |
|
1757 |
|
| 1758 |
// If we removed all uses, nuke the cmp. |
1758 |
// If we removed all uses, nuke the cmp. |
| 1759 |
if (Cmp->use_empty()) { |
1759 |
if (Cmp->use_empty()) { |
| 1760 |
Cmp->eraseFromParent(); |
1760 |
Cmp->eraseFromParent(); |
| 1761 |
MadeChange = true; |
1761 |
MadeChange = true; |
| 1762 |
} |
1762 |
} |
| 1763 |
|
1763 |
|
| 1764 |
return MadeChange; |
1764 |
return MadeChange; |
| 1765 |
} |
1765 |
} |
| 1766 |
|
1766 |
|
| 1767 |
/// For pattern like: |
1767 |
/// For pattern like: |
| 1768 |
/// |
1768 |
/// |
| 1769 |
/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) |
1769 |
/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) |
| 1770 |
/// ... |
1770 |
/// ... |
| 1771 |
/// DomBB: |
1771 |
/// DomBB: |
| 1772 |
/// ... |
1772 |
/// ... |
| 1773 |
/// br DomCond, TrueBB, CmpBB |
1773 |
/// br DomCond, TrueBB, CmpBB |
| 1774 |
/// CmpBB: (with DomBB being the single predecessor) |
1774 |
/// CmpBB: (with DomBB being the single predecessor) |
| 1775 |
/// ... |
1775 |
/// ... |
| 1776 |
/// Cmp = icmp eq CmpOp0, CmpOp1 |
1776 |
/// Cmp = icmp eq CmpOp0, CmpOp1 |
| 1777 |
/// ... |
1777 |
/// ... |
| 1778 |
/// |
1778 |
/// |
| 1779 |
/// It would use two comparison on targets that lowering of icmp sgt/slt is |
1779 |
/// It would use two comparison on targets that lowering of icmp sgt/slt is |
| 1780 |
/// different from lowering of icmp eq (PowerPC). This function try to convert |
1780 |
/// different from lowering of icmp eq (PowerPC). This function try to convert |
| 1781 |
/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. |
1781 |
/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. |
| 1782 |
/// After that, DomCond and Cmp can use the same comparison so reduce one |
1782 |
/// After that, DomCond and Cmp can use the same comparison so reduce one |
| 1783 |
/// comparison. |
1783 |
/// comparison. |
| 1784 |
/// |
1784 |
/// |
| 1785 |
/// Return true if any changes are made. |
1785 |
/// Return true if any changes are made. |
| 1786 |
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, |
1786 |
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, |
| 1787 |
const TargetLowering &TLI) { |
1787 |
const TargetLowering &TLI) { |
| 1788 |
if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) |
1788 |
if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) |
| 1789 |
return false; |
1789 |
return false; |
| 1790 |
|
1790 |
|
| 1791 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
1791 |
ICmpInst::Predicate Pred = Cmp->getPredicate(); |
| 1792 |
if (Pred != ICmpInst::ICMP_EQ) |
1792 |
if (Pred != ICmpInst::ICMP_EQ) |
| 1793 |
return false; |
1793 |
return false; |
| 1794 |
|
1794 |
|
| 1795 |
// If icmp eq has users other than BranchInst and SelectInst, converting it to |
1795 |
// If icmp eq has users other than BranchInst and SelectInst, converting it to |
| 1796 |
// icmp slt/sgt would introduce more redundant LLVM IR. |
1796 |
// icmp slt/sgt would introduce more redundant LLVM IR. |
| 1797 |
for (User *U : Cmp->users()) { |
1797 |
for (User *U : Cmp->users()) { |
| 1798 |
if (isa(U)) |
1798 |
if (isa(U)) |
| 1799 |
continue; |
1799 |
continue; |
| 1800 |
if (isa(U) && cast(U)->getCondition() == Cmp) |
1800 |
if (isa(U) && cast(U)->getCondition() == Cmp) |
| 1801 |
continue; |
1801 |
continue; |
| 1802 |
return false; |
1802 |
return false; |
| 1803 |
} |
1803 |
} |
| 1804 |
|
1804 |
|
| 1805 |
// This is a cheap/incomplete check for dominance - just match a single |
1805 |
// This is a cheap/incomplete check for dominance - just match a single |
| 1806 |
// predecessor with a conditional branch. |
1806 |
// predecessor with a conditional branch. |
| 1807 |
BasicBlock *CmpBB = Cmp->getParent(); |
1807 |
BasicBlock *CmpBB = Cmp->getParent(); |
| 1808 |
BasicBlock *DomBB = CmpBB->getSinglePredecessor(); |
1808 |
BasicBlock *DomBB = CmpBB->getSinglePredecessor(); |
| 1809 |
if (!DomBB) |
1809 |
if (!DomBB) |
| 1810 |
return false; |
1810 |
return false; |
| 1811 |
|
1811 |
|
| 1812 |
// We want to ensure that the only way control gets to the comparison of |
1812 |
// We want to ensure that the only way control gets to the comparison of |
| 1813 |
// interest is that a less/greater than comparison on the same operands is |
1813 |
// interest is that a less/greater than comparison on the same operands is |
| 1814 |
// false. |
1814 |
// false. |
| 1815 |
Value *DomCond; |
1815 |
Value *DomCond; |
| 1816 |
BasicBlock *TrueBB, *FalseBB; |
1816 |
BasicBlock *TrueBB, *FalseBB; |
| 1817 |
if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) |
1817 |
if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) |
| 1818 |
return false; |
1818 |
return false; |
| 1819 |
if (CmpBB != FalseBB) |
1819 |
if (CmpBB != FalseBB) |
| 1820 |
return false; |
1820 |
return false; |
| 1821 |
|
1821 |
|
| 1822 |
Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); |
1822 |
Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); |
| 1823 |
ICmpInst::Predicate DomPred; |
1823 |
ICmpInst::Predicate DomPred; |
| 1824 |
if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) |
1824 |
if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) |
| 1825 |
return false; |
1825 |
return false; |
| 1826 |
if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) |
1826 |
if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) |
| 1827 |
return false; |
1827 |
return false; |
| 1828 |
|
1828 |
|
| 1829 |
// Convert the equality comparison to the opposite of the dominating |
1829 |
// Convert the equality comparison to the opposite of the dominating |
| 1830 |
// comparison and swap the direction for all branch/select users. |
1830 |
// comparison and swap the direction for all branch/select users. |
| 1831 |
// We have conceptually converted: |
1831 |
// We have conceptually converted: |
| 1832 |
// Res = (a < b) ? : (a == b) ? : ; |
1832 |
// Res = (a < b) ? : (a == b) ? : ; |
| 1833 |
// to |
1833 |
// to |
| 1834 |
// Res = (a < b) ? : (a > b) ? : ; |
1834 |
// Res = (a < b) ? : (a > b) ? : ; |
| 1835 |
// And similarly for branches. |
1835 |
// And similarly for branches. |
| 1836 |
for (User *U : Cmp->users()) { |
1836 |
for (User *U : Cmp->users()) { |
| 1837 |
if (auto *BI = dyn_cast(U)) { |
1837 |
if (auto *BI = dyn_cast(U)) { |
| 1838 |
assert(BI->isConditional() && "Must be conditional"); |
1838 |
assert(BI->isConditional() && "Must be conditional"); |
| 1839 |
BI->swapSuccessors(); |
1839 |
BI->swapSuccessors(); |
| 1840 |
continue; |
1840 |
continue; |
| 1841 |
} |
1841 |
} |
| 1842 |
if (auto *SI = dyn_cast(U)) { |
1842 |
if (auto *SI = dyn_cast(U)) { |
| 1843 |
// Swap operands |
1843 |
// Swap operands |
| 1844 |
SI->swapValues(); |
1844 |
SI->swapValues(); |
| 1845 |
SI->swapProfMetadata(); |
1845 |
SI->swapProfMetadata(); |
| 1846 |
continue; |
1846 |
continue; |
| 1847 |
} |
1847 |
} |
| 1848 |
llvm_unreachable("Must be a branch or a select"); |
1848 |
llvm_unreachable("Must be a branch or a select"); |
| 1849 |
} |
1849 |
} |
| 1850 |
Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); |
1850 |
Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); |
| 1851 |
return true; |
1851 |
return true; |
| 1852 |
} |
1852 |
} |
| 1853 |
|
1853 |
|
| 1854 |
/// Many architectures use the same instruction for both subtract and cmp. Try |
1854 |
/// Many architectures use the same instruction for both subtract and cmp. Try |
| 1855 |
/// to swap cmp operands to match subtract operations to allow for CSE. |
1855 |
/// to swap cmp operands to match subtract operations to allow for CSE. |
| 1856 |
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { |
1856 |
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) { |
| 1857 |
Value *Op0 = Cmp->getOperand(0); |
1857 |
Value *Op0 = Cmp->getOperand(0); |
| 1858 |
Value *Op1 = Cmp->getOperand(1); |
1858 |
Value *Op1 = Cmp->getOperand(1); |
| 1859 |
if (!Op0->getType()->isIntegerTy() || isa(Op0) || |
1859 |
if (!Op0->getType()->isIntegerTy() || isa(Op0) || |
| 1860 |
isa(Op1) || Op0 == Op1) |
1860 |
isa(Op1) || Op0 == Op1) |
| 1861 |
return false; |
1861 |
return false; |
| 1862 |
|
1862 |
|
| 1863 |
// If a subtract already has the same operands as a compare, swapping would be |
1863 |
// If a subtract already has the same operands as a compare, swapping would be |
| 1864 |
// bad. If a subtract has the same operands as a compare but in reverse order, |
1864 |
// bad. If a subtract has the same operands as a compare but in reverse order, |
| 1865 |
// then swapping is good. |
1865 |
// then swapping is good. |
| 1866 |
int GoodToSwap = 0; |
1866 |
int GoodToSwap = 0; |
| 1867 |
unsigned NumInspected = 0; |
1867 |
unsigned NumInspected = 0; |
| 1868 |
for (const User *U : Op0->users()) { |
1868 |
for (const User *U : Op0->users()) { |
| 1869 |
// Avoid walking many users. |
1869 |
// Avoid walking many users. |
| 1870 |
if (++NumInspected > 128) |
1870 |
if (++NumInspected > 128) |
| 1871 |
return false; |
1871 |
return false; |
| 1872 |
if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0)))) |
1872 |
if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0)))) |
| 1873 |
GoodToSwap++; |
1873 |
GoodToSwap++; |
| 1874 |
else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1)))) |
1874 |
else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1)))) |
| 1875 |
GoodToSwap--; |
1875 |
GoodToSwap--; |
| 1876 |
} |
1876 |
} |
| 1877 |
|
1877 |
|
| 1878 |
if (GoodToSwap > 0) { |
1878 |
if (GoodToSwap > 0) { |
| 1879 |
Cmp->swapOperands(); |
1879 |
Cmp->swapOperands(); |
| 1880 |
return true; |
1880 |
return true; |
| 1881 |
} |
1881 |
} |
| 1882 |
return false; |
1882 |
return false; |
| 1883 |
} |
1883 |
} |
| 1884 |
|
1884 |
|
| 1885 |
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { |
1885 |
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { |
| 1886 |
if (sinkCmpExpression(Cmp, *TLI)) |
1886 |
if (sinkCmpExpression(Cmp, *TLI)) |
| 1887 |
return true; |
1887 |
return true; |
| 1888 |
|
1888 |
|
| 1889 |
if (combineToUAddWithOverflow(Cmp, ModifiedDT)) |
1889 |
if (combineToUAddWithOverflow(Cmp, ModifiedDT)) |
| 1890 |
return true; |
1890 |
return true; |
| 1891 |
|
1891 |
|
| 1892 |
if (combineToUSubWithOverflow(Cmp, ModifiedDT)) |
1892 |
if (combineToUSubWithOverflow(Cmp, ModifiedDT)) |
| 1893 |
return true; |
1893 |
return true; |
| 1894 |
|
1894 |
|
| 1895 |
if (foldICmpWithDominatingICmp(Cmp, *TLI)) |
1895 |
if (foldICmpWithDominatingICmp(Cmp, *TLI)) |
| 1896 |
return true; |
1896 |
return true; |
| 1897 |
|
1897 |
|
| 1898 |
if (swapICmpOperandsToExposeCSEOpportunities(Cmp)) |
1898 |
if (swapICmpOperandsToExposeCSEOpportunities(Cmp)) |
| 1899 |
return true; |
1899 |
return true; |
| 1900 |
|
1900 |
|
| 1901 |
return false; |
1901 |
return false; |
| 1902 |
} |
1902 |
} |
| 1903 |
|
1903 |
|
| 1904 |
/// Duplicate and sink the given 'and' instruction into user blocks where it is |
1904 |
/// Duplicate and sink the given 'and' instruction into user blocks where it is |
| 1905 |
/// used in a compare to allow isel to generate better code for targets where |
1905 |
/// used in a compare to allow isel to generate better code for targets where |
| 1906 |
/// this operation can be combined. |
1906 |
/// this operation can be combined. |
| 1907 |
/// |
1907 |
/// |
| 1908 |
/// Return true if any changes are made. |
1908 |
/// Return true if any changes are made. |
| 1909 |
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, |
1909 |
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, |
| 1910 |
SetOfInstrs &InsertedInsts) { |
1910 |
SetOfInstrs &InsertedInsts) { |
| 1911 |
// Double-check that we're not trying to optimize an instruction that was |
1911 |
// Double-check that we're not trying to optimize an instruction that was |
| 1912 |
// already optimized by some other part of this pass. |
1912 |
// already optimized by some other part of this pass. |
| 1913 |
assert(!InsertedInsts.count(AndI) && |
1913 |
assert(!InsertedInsts.count(AndI) && |
| 1914 |
"Attempting to optimize already optimized and instruction"); |
1914 |
"Attempting to optimize already optimized and instruction"); |
| 1915 |
(void)InsertedInsts; |
1915 |
(void)InsertedInsts; |
| 1916 |
|
1916 |
|
| 1917 |
// Nothing to do for single use in same basic block. |
1917 |
// Nothing to do for single use in same basic block. |
| 1918 |
if (AndI->hasOneUse() && |
1918 |
if (AndI->hasOneUse() && |
| 1919 |
AndI->getParent() == cast(*AndI->user_begin())->getParent()) |
1919 |
AndI->getParent() == cast(*AndI->user_begin())->getParent()) |
| 1920 |
return false; |
1920 |
return false; |
| 1921 |
|
1921 |
|
| 1922 |
// Try to avoid cases where sinking/duplicating is likely to increase register |
1922 |
// Try to avoid cases where sinking/duplicating is likely to increase register |
| 1923 |
// pressure. |
1923 |
// pressure. |
| 1924 |
if (!isa(AndI->getOperand(0)) && |
1924 |
if (!isa(AndI->getOperand(0)) && |
| 1925 |
!isa(AndI->getOperand(1)) && |
1925 |
!isa(AndI->getOperand(1)) && |
| 1926 |
AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) |
1926 |
AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) |
| 1927 |
return false; |
1927 |
return false; |
| 1928 |
|
1928 |
|
| 1929 |
for (auto *U : AndI->users()) { |
1929 |
for (auto *U : AndI->users()) { |
| 1930 |
Instruction *User = cast(U); |
1930 |
Instruction *User = cast(U); |
| 1931 |
|
1931 |
|
| 1932 |
// Only sink 'and' feeding icmp with 0. |
1932 |
// Only sink 'and' feeding icmp with 0. |
| 1933 |
if (!isa(User)) |
1933 |
if (!isa(User)) |
| 1934 |
return false; |
1934 |
return false; |
| 1935 |
|
1935 |
|
| 1936 |
auto *CmpC = dyn_cast(User->getOperand(1)); |
1936 |
auto *CmpC = dyn_cast(User->getOperand(1)); |
| 1937 |
if (!CmpC || !CmpC->isZero()) |
1937 |
if (!CmpC || !CmpC->isZero()) |
| 1938 |
return false; |
1938 |
return false; |
| 1939 |
} |
1939 |
} |
| 1940 |
|
1940 |
|
| 1941 |
if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) |
1941 |
if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) |
| 1942 |
return false; |
1942 |
return false; |
| 1943 |
|
1943 |
|
| 1944 |
LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); |
1944 |
LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); |
| 1945 |
LLVM_DEBUG(AndI->getParent()->dump()); |
1945 |
LLVM_DEBUG(AndI->getParent()->dump()); |
| 1946 |
|
1946 |
|
| 1947 |
// Push the 'and' into the same block as the icmp 0. There should only be |
1947 |
// Push the 'and' into the same block as the icmp 0. There should only be |
| 1948 |
// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any |
1948 |
// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any |
| 1949 |
// others, so we don't need to keep track of which BBs we insert into. |
1949 |
// others, so we don't need to keep track of which BBs we insert into. |
| 1950 |
for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); |
1950 |
for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); |
| 1951 |
UI != E;) { |
1951 |
UI != E;) { |
| 1952 |
Use &TheUse = UI.getUse(); |
1952 |
Use &TheUse = UI.getUse(); |
| 1953 |
Instruction *User = cast(*UI); |
1953 |
Instruction *User = cast(*UI); |
| 1954 |
|
1954 |
|
| 1955 |
// Preincrement use iterator so we don't invalidate it. |
1955 |
// Preincrement use iterator so we don't invalidate it. |
| 1956 |
++UI; |
1956 |
++UI; |
| 1957 |
|
1957 |
|
| 1958 |
LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); |
1958 |
LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); |
| 1959 |
|
1959 |
|
| 1960 |
// Keep the 'and' in the same place if the use is already in the same block. |
1960 |
// Keep the 'and' in the same place if the use is already in the same block. |
| 1961 |
Instruction *InsertPt = |
1961 |
Instruction *InsertPt = |
| 1962 |
User->getParent() == AndI->getParent() ? AndI : User; |
1962 |
User->getParent() == AndI->getParent() ? AndI : User; |
| 1963 |
Instruction *InsertedAnd = |
1963 |
Instruction *InsertedAnd = |
| 1964 |
BinaryOperator::Create(Instruction::And, AndI->getOperand(0), |
1964 |
BinaryOperator::Create(Instruction::And, AndI->getOperand(0), |
| 1965 |
AndI->getOperand(1), "", InsertPt); |
1965 |
AndI->getOperand(1), "", InsertPt); |
| 1966 |
// Propagate the debug info. |
1966 |
// Propagate the debug info. |
| 1967 |
InsertedAnd->setDebugLoc(AndI->getDebugLoc()); |
1967 |
InsertedAnd->setDebugLoc(AndI->getDebugLoc()); |
| 1968 |
|
1968 |
|
| 1969 |
// Replace a use of the 'and' with a use of the new 'and'. |
1969 |
// Replace a use of the 'and' with a use of the new 'and'. |
| 1970 |
TheUse = InsertedAnd; |
1970 |
TheUse = InsertedAnd; |
| 1971 |
++NumAndUses; |
1971 |
++NumAndUses; |
| 1972 |
LLVM_DEBUG(User->getParent()->dump()); |
1972 |
LLVM_DEBUG(User->getParent()->dump()); |
| 1973 |
} |
1973 |
} |
| 1974 |
|
1974 |
|
| 1975 |
// We removed all uses, nuke the and. |
1975 |
// We removed all uses, nuke the and. |
| 1976 |
AndI->eraseFromParent(); |
1976 |
AndI->eraseFromParent(); |
| 1977 |
return true; |
1977 |
return true; |
| 1978 |
} |
1978 |
} |
| 1979 |
|
1979 |
|
| 1980 |
/// Check if the candidates could be combined with a shift instruction, which |
1980 |
/// Check if the candidates could be combined with a shift instruction, which |
| 1981 |
/// includes: |
1981 |
/// includes: |
| 1982 |
/// 1. Truncate instruction |
1982 |
/// 1. Truncate instruction |
| 1983 |
/// 2. And instruction and the imm is a mask of the low bits: |
1983 |
/// 2. And instruction and the imm is a mask of the low bits: |
| 1984 |
/// imm & (imm+1) == 0 |
1984 |
/// imm & (imm+1) == 0 |
| 1985 |
static bool isExtractBitsCandidateUse(Instruction *User) { |
1985 |
static bool isExtractBitsCandidateUse(Instruction *User) { |
| 1986 |
if (!isa(User)) { |
1986 |
if (!isa(User)) { |
| 1987 |
if (User->getOpcode() != Instruction::And || |
1987 |
if (User->getOpcode() != Instruction::And || |
| 1988 |
!isa(User->getOperand(1))) |
1988 |
!isa(User->getOperand(1))) |
| 1989 |
return false; |
1989 |
return false; |
| 1990 |
|
1990 |
|
| 1991 |
const APInt &Cimm = cast(User->getOperand(1))->getValue(); |
1991 |
const APInt &Cimm = cast(User->getOperand(1))->getValue(); |
| 1992 |
|
1992 |
|
| 1993 |
if ((Cimm & (Cimm + 1)).getBoolValue()) |
1993 |
if ((Cimm & (Cimm + 1)).getBoolValue()) |
| 1994 |
return false; |
1994 |
return false; |
| 1995 |
} |
1995 |
} |
| 1996 |
return true; |
1996 |
return true; |
| 1997 |
} |
1997 |
} |
| 1998 |
|
1998 |
|
| 1999 |
/// Sink both shift and truncate instruction to the use of truncate's BB. |
1999 |
/// Sink both shift and truncate instruction to the use of truncate's BB. |
| 2000 |
static bool |
2000 |
static bool |
| 2001 |
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, |
2001 |
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, |
| 2002 |
DenseMap &InsertedShifts, |
2002 |
DenseMap &InsertedShifts, |
| 2003 |
const TargetLowering &TLI, const DataLayout &DL) { |
2003 |
const TargetLowering &TLI, const DataLayout &DL) { |
| 2004 |
BasicBlock *UserBB = User->getParent(); |
2004 |
BasicBlock *UserBB = User->getParent(); |
| 2005 |
DenseMap InsertedTruncs; |
2005 |
DenseMap InsertedTruncs; |
| 2006 |
auto *TruncI = cast(User); |
2006 |
auto *TruncI = cast(User); |
| 2007 |
bool MadeChange = false; |
2007 |
bool MadeChange = false; |
| 2008 |
|
2008 |
|
| 2009 |
for (Value::user_iterator TruncUI = TruncI->user_begin(), |
2009 |
for (Value::user_iterator TruncUI = TruncI->user_begin(), |
| 2010 |
TruncE = TruncI->user_end(); |
2010 |
TruncE = TruncI->user_end(); |
| 2011 |
TruncUI != TruncE;) { |
2011 |
TruncUI != TruncE;) { |
| 2012 |
|
2012 |
|
| 2013 |
Use &TruncTheUse = TruncUI.getUse(); |
2013 |
Use &TruncTheUse = TruncUI.getUse(); |
| 2014 |
Instruction *TruncUser = cast(*TruncUI); |
2014 |
Instruction *TruncUser = cast(*TruncUI); |
| 2015 |
// Preincrement use iterator so we don't invalidate it. |
2015 |
// Preincrement use iterator so we don't invalidate it. |
| 2016 |
|
2016 |
|
| 2017 |
++TruncUI; |
2017 |
++TruncUI; |
| 2018 |
|
2018 |
|
| 2019 |
int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); |
2019 |
int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); |
| 2020 |
if (!ISDOpcode) |
2020 |
if (!ISDOpcode) |
| 2021 |
continue; |
2021 |
continue; |
| 2022 |
|
2022 |
|
| 2023 |
// If the use is actually a legal node, there will not be an |
2023 |
// If the use is actually a legal node, there will not be an |
| 2024 |
// implicit truncate. |
2024 |
// implicit truncate. |
| 2025 |
// FIXME: always querying the result type is just an |
2025 |
// FIXME: always querying the result type is just an |
| 2026 |
// approximation; some nodes' legality is determined by the |
2026 |
// approximation; some nodes' legality is determined by the |
| 2027 |
// operand or other means. There's no good way to find out though. |
2027 |
// operand or other means. There's no good way to find out though. |
| 2028 |
if (TLI.isOperationLegalOrCustom( |
2028 |
if (TLI.isOperationLegalOrCustom( |
| 2029 |
ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) |
2029 |
ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) |
| 2030 |
continue; |
2030 |
continue; |
| 2031 |
|
2031 |
|
| 2032 |
// Don't bother for PHI nodes. |
2032 |
// Don't bother for PHI nodes. |
| 2033 |
if (isa(TruncUser)) |
2033 |
if (isa(TruncUser)) |
| 2034 |
continue; |
2034 |
continue; |
| 2035 |
|
2035 |
|
| 2036 |
BasicBlock *TruncUserBB = TruncUser->getParent(); |
2036 |
BasicBlock *TruncUserBB = TruncUser->getParent(); |
| 2037 |
|
2037 |
|
| 2038 |
if (UserBB == TruncUserBB) |
2038 |
if (UserBB == TruncUserBB) |
| 2039 |
continue; |
2039 |
continue; |
| 2040 |
|
2040 |
|
| 2041 |
BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; |
2041 |
BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; |
| 2042 |
CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; |
2042 |
CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; |
| 2043 |
|
2043 |
|
| 2044 |
if (!InsertedShift && !InsertedTrunc) { |
2044 |
if (!InsertedShift && !InsertedTrunc) { |
| 2045 |
BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); |
2045 |
BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); |
| 2046 |
assert(InsertPt != TruncUserBB->end()); |
2046 |
assert(InsertPt != TruncUserBB->end()); |
| 2047 |
// Sink the shift |
2047 |
// Sink the shift |
| 2048 |
if (ShiftI->getOpcode() == Instruction::AShr) |
2048 |
if (ShiftI->getOpcode() == Instruction::AShr) |
| 2049 |
InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, |
2049 |
InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, |
| 2050 |
"", &*InsertPt); |
2050 |
"", &*InsertPt); |
| 2051 |
else |
2051 |
else |
| 2052 |
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, |
2052 |
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, |
| 2053 |
"", &*InsertPt); |
2053 |
"", &*InsertPt); |
| 2054 |
InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); |
2054 |
InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); |
| 2055 |
|
2055 |
|
| 2056 |
// Sink the trunc |
2056 |
// Sink the trunc |
| 2057 |
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); |
2057 |
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); |
| 2058 |
TruncInsertPt++; |
2058 |
TruncInsertPt++; |
| 2059 |
assert(TruncInsertPt != TruncUserBB->end()); |
2059 |
assert(TruncInsertPt != TruncUserBB->end()); |
| 2060 |
|
2060 |
|
| 2061 |
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, |
2061 |
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, |
| 2062 |
TruncI->getType(), "", &*TruncInsertPt); |
2062 |
TruncI->getType(), "", &*TruncInsertPt); |
| 2063 |
InsertedTrunc->setDebugLoc(TruncI->getDebugLoc()); |
2063 |
InsertedTrunc->setDebugLoc(TruncI->getDebugLoc()); |
| 2064 |
|
2064 |
|
| 2065 |
MadeChange = true; |
2065 |
MadeChange = true; |
| 2066 |
|
2066 |
|
| 2067 |
TruncTheUse = InsertedTrunc; |
2067 |
TruncTheUse = InsertedTrunc; |
| 2068 |
} |
2068 |
} |
| 2069 |
} |
2069 |
} |
| 2070 |
return MadeChange; |
2070 |
return MadeChange; |
| 2071 |
} |
2071 |
} |
| 2072 |
|
2072 |
|
| 2073 |
/// Sink the shift *right* instruction into user blocks if the uses could |
2073 |
/// Sink the shift *right* instruction into user blocks if the uses could |
| 2074 |
/// potentially be combined with this shift instruction and generate BitExtract |
2074 |
/// potentially be combined with this shift instruction and generate BitExtract |
| 2075 |
/// instruction. It will only be applied if the architecture supports BitExtract |
2075 |
/// instruction. It will only be applied if the architecture supports BitExtract |
| 2076 |
/// instruction. Here is an example: |
2076 |
/// instruction. Here is an example: |
| 2077 |
/// BB1: |
2077 |
/// BB1: |
| 2078 |
/// %x.extract.shift = lshr i64 %arg1, 32 |
2078 |
/// %x.extract.shift = lshr i64 %arg1, 32 |
| 2079 |
/// BB2: |
2079 |
/// BB2: |
| 2080 |
/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 |
2080 |
/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 |
| 2081 |
/// ==> |
2081 |
/// ==> |
| 2082 |
/// |
2082 |
/// |
| 2083 |
/// BB2: |
2083 |
/// BB2: |
| 2084 |
/// %x.extract.shift.1 = lshr i64 %arg1, 32 |
2084 |
/// %x.extract.shift.1 = lshr i64 %arg1, 32 |
| 2085 |
/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 |
2085 |
/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 |
| 2086 |
/// |
2086 |
/// |
| 2087 |
/// CodeGen will recognize the pattern in BB2 and generate BitExtract |
2087 |
/// CodeGen will recognize the pattern in BB2 and generate BitExtract |
| 2088 |
/// instruction. |
2088 |
/// instruction. |
| 2089 |
/// Return true if any changes are made. |
2089 |
/// Return true if any changes are made. |
| 2090 |
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, |
2090 |
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, |
| 2091 |
const TargetLowering &TLI, |
2091 |
const TargetLowering &TLI, |
| 2092 |
const DataLayout &DL) { |
2092 |
const DataLayout &DL) { |
| 2093 |
BasicBlock *DefBB = ShiftI->getParent(); |
2093 |
BasicBlock *DefBB = ShiftI->getParent(); |
| 2094 |
|
2094 |
|
| 2095 |
/// Only insert instructions in each block once. |
2095 |
/// Only insert instructions in each block once. |
| 2096 |
DenseMap InsertedShifts; |
2096 |
DenseMap InsertedShifts; |
| 2097 |
|
2097 |
|
| 2098 |
bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); |
2098 |
bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); |
| 2099 |
|
2099 |
|
| 2100 |
bool MadeChange = false; |
2100 |
bool MadeChange = false; |
| 2101 |
for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); |
2101 |
for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); |
| 2102 |
UI != E;) { |
2102 |
UI != E;) { |
| 2103 |
Use &TheUse = UI.getUse(); |
2103 |
Use &TheUse = UI.getUse(); |
| 2104 |
Instruction *User = cast(*UI); |
2104 |
Instruction *User = cast(*UI); |
| 2105 |
// Preincrement use iterator so we don't invalidate it. |
2105 |
// Preincrement use iterator so we don't invalidate it. |
| 2106 |
++UI; |
2106 |
++UI; |
| 2107 |
|
2107 |
|
| 2108 |
// Don't bother for PHI nodes. |
2108 |
// Don't bother for PHI nodes. |
| 2109 |
if (isa(User)) |
2109 |
if (isa(User)) |
| 2110 |
continue; |
2110 |
continue; |
| 2111 |
|
2111 |
|
| 2112 |
if (!isExtractBitsCandidateUse(User)) |
2112 |
if (!isExtractBitsCandidateUse(User)) |
| 2113 |
continue; |
2113 |
continue; |
| 2114 |
|
2114 |
|
| 2115 |
BasicBlock *UserBB = User->getParent(); |
2115 |
BasicBlock *UserBB = User->getParent(); |
| 2116 |
|
2116 |
|
| 2117 |
if (UserBB == DefBB) { |
2117 |
if (UserBB == DefBB) { |
| 2118 |
// If the shift and truncate instruction are in the same BB. The use of |
2118 |
// If the shift and truncate instruction are in the same BB. The use of |
| 2119 |
// the truncate(TruncUse) may still introduce another truncate if not |
2119 |
// the truncate(TruncUse) may still introduce another truncate if not |
| 2120 |
// legal. In this case, we would like to sink both shift and truncate |
2120 |
// legal. In this case, we would like to sink both shift and truncate |
| 2121 |
// instruction to the BB of TruncUse. |
2121 |
// instruction to the BB of TruncUse. |
| 2122 |
// for example: |
2122 |
// for example: |
| 2123 |
// BB1: |
2123 |
// BB1: |
| 2124 |
// i64 shift.result = lshr i64 opnd, imm |
2124 |
// i64 shift.result = lshr i64 opnd, imm |
| 2125 |
// trunc.result = trunc shift.result to i16 |
2125 |
// trunc.result = trunc shift.result to i16 |
| 2126 |
// |
2126 |
// |
| 2127 |
// BB2: |
2127 |
// BB2: |
| 2128 |
// ----> We will have an implicit truncate here if the architecture does |
2128 |
// ----> We will have an implicit truncate here if the architecture does |
| 2129 |
// not have i16 compare. |
2129 |
// not have i16 compare. |
| 2130 |
// cmp i16 trunc.result, opnd2 |
2130 |
// cmp i16 trunc.result, opnd2 |
| 2131 |
// |
2131 |
// |
| 2132 |
if (isa(User) && |
2132 |
if (isa(User) && |
| 2133 |
shiftIsLegal |
2133 |
shiftIsLegal |
| 2134 |
// If the type of the truncate is legal, no truncate will be |
2134 |
// If the type of the truncate is legal, no truncate will be |
| 2135 |
// introduced in other basic blocks. |
2135 |
// introduced in other basic blocks. |
| 2136 |
&& (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) |
2136 |
&& (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) |
| 2137 |
MadeChange = |
2137 |
MadeChange = |
| 2138 |
SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); |
2138 |
SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); |
| 2139 |
|
2139 |
|
| 2140 |
continue; |
2140 |
continue; |
| 2141 |
} |
2141 |
} |
| 2142 |
// If we have already inserted a shift into this block, use it. |
2142 |
// If we have already inserted a shift into this block, use it. |
| 2143 |
BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; |
2143 |
BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; |
| 2144 |
|
2144 |
|
| 2145 |
if (!InsertedShift) { |
2145 |
if (!InsertedShift) { |
| 2146 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
2146 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
| 2147 |
assert(InsertPt != UserBB->end()); |
2147 |
assert(InsertPt != UserBB->end()); |
| 2148 |
|
2148 |
|
| 2149 |
if (ShiftI->getOpcode() == Instruction::AShr) |
2149 |
if (ShiftI->getOpcode() == Instruction::AShr) |
| 2150 |
InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, |
2150 |
InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, |
| 2151 |
"", &*InsertPt); |
2151 |
"", &*InsertPt); |
| 2152 |
else |
2152 |
else |
| 2153 |
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, |
2153 |
InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, |
| 2154 |
"", &*InsertPt); |
2154 |
"", &*InsertPt); |
| 2155 |
InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); |
2155 |
InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); |
| 2156 |
|
2156 |
|
| 2157 |
MadeChange = true; |
2157 |
MadeChange = true; |
| 2158 |
} |
2158 |
} |
| 2159 |
|
2159 |
|
| 2160 |
// Replace a use of the shift with a use of the new shift. |
2160 |
// Replace a use of the shift with a use of the new shift. |
| 2161 |
TheUse = InsertedShift; |
2161 |
TheUse = InsertedShift; |
| 2162 |
} |
2162 |
} |
| 2163 |
|
2163 |
|
| 2164 |
// If we removed all uses, or there are none, nuke the shift. |
2164 |
// If we removed all uses, or there are none, nuke the shift. |
| 2165 |
if (ShiftI->use_empty()) { |
2165 |
if (ShiftI->use_empty()) { |
| 2166 |
salvageDebugInfo(*ShiftI); |
2166 |
salvageDebugInfo(*ShiftI); |
| 2167 |
ShiftI->eraseFromParent(); |
2167 |
ShiftI->eraseFromParent(); |
| 2168 |
MadeChange = true; |
2168 |
MadeChange = true; |
| 2169 |
} |
2169 |
} |
| 2170 |
|
2170 |
|
| 2171 |
return MadeChange; |
2171 |
return MadeChange; |
| 2172 |
} |
2172 |
} |
| 2173 |
|
2173 |
|
| 2174 |
/// If counting leading or trailing zeros is an expensive operation and a zero |
2174 |
/// If counting leading or trailing zeros is an expensive operation and a zero |
| 2175 |
/// input is defined, add a check for zero to avoid calling the intrinsic. |
2175 |
/// input is defined, add a check for zero to avoid calling the intrinsic. |
| 2176 |
/// |
2176 |
/// |
| 2177 |
/// We want to transform: |
2177 |
/// We want to transform: |
| 2178 |
/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) |
2178 |
/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) |
| 2179 |
/// |
2179 |
/// |
| 2180 |
/// into: |
2180 |
/// into: |
| 2181 |
/// entry: |
2181 |
/// entry: |
| 2182 |
/// %cmpz = icmp eq i64 %A, 0 |
2182 |
/// %cmpz = icmp eq i64 %A, 0 |
| 2183 |
/// br i1 %cmpz, label %cond.end, label %cond.false |
2183 |
/// br i1 %cmpz, label %cond.end, label %cond.false |
| 2184 |
/// cond.false: |
2184 |
/// cond.false: |
| 2185 |
/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) |
2185 |
/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) |
| 2186 |
/// br label %cond.end |
2186 |
/// br label %cond.end |
| 2187 |
/// cond.end: |
2187 |
/// cond.end: |
| 2188 |
/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] |
2188 |
/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] |
| 2189 |
/// |
2189 |
/// |
| 2190 |
/// If the transform is performed, return true and set ModifiedDT to true. |
2190 |
/// If the transform is performed, return true and set ModifiedDT to true. |
| 2191 |
static bool despeculateCountZeros(IntrinsicInst *CountZeros, |
2191 |
static bool despeculateCountZeros(IntrinsicInst *CountZeros, |
| 2192 |
LoopInfo &LI, |
2192 |
LoopInfo &LI, |
| 2193 |
const TargetLowering *TLI, |
2193 |
const TargetLowering *TLI, |
| 2194 |
const DataLayout *DL, ModifyDT &ModifiedDT, |
2194 |
const DataLayout *DL, ModifyDT &ModifiedDT, |
| 2195 |
SmallSet &FreshBBs, |
2195 |
SmallSet &FreshBBs, |
| 2196 |
bool IsHugeFunc) { |
2196 |
bool IsHugeFunc) { |
| 2197 |
// If a zero input is undefined, it doesn't make sense to despeculate that. |
2197 |
// If a zero input is undefined, it doesn't make sense to despeculate that. |
| 2198 |
if (match(CountZeros->getOperand(1), m_One())) |
2198 |
if (match(CountZeros->getOperand(1), m_One())) |
| 2199 |
return false; |
2199 |
return false; |
| 2200 |
|
2200 |
|
| 2201 |
// If it's cheap to speculate, there's nothing to do. |
2201 |
// If it's cheap to speculate, there's nothing to do. |
| 2202 |
Type *Ty = CountZeros->getType(); |
2202 |
Type *Ty = CountZeros->getType(); |
| 2203 |
auto IntrinsicID = CountZeros->getIntrinsicID(); |
2203 |
auto IntrinsicID = CountZeros->getIntrinsicID(); |
| 2204 |
if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) || |
2204 |
if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) || |
| 2205 |
(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty))) |
2205 |
(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty))) |
| 2206 |
return false; |
2206 |
return false; |
| 2207 |
|
2207 |
|
| 2208 |
// Only handle legal scalar cases. Anything else requires too much work. |
2208 |
// Only handle legal scalar cases. Anything else requires too much work. |
| 2209 |
unsigned SizeInBits = Ty->getScalarSizeInBits(); |
2209 |
unsigned SizeInBits = Ty->getScalarSizeInBits(); |
| 2210 |
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) |
2210 |
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) |
| 2211 |
return false; |
2211 |
return false; |
| 2212 |
|
2212 |
|
| 2213 |
// Bail if the value is never zero. |
2213 |
// Bail if the value is never zero. |
| 2214 |
Use &Op = CountZeros->getOperandUse(0); |
2214 |
Use &Op = CountZeros->getOperandUse(0); |
| 2215 |
if (isKnownNonZero(Op, *DL)) |
2215 |
if (isKnownNonZero(Op, *DL)) |
| 2216 |
return false; |
2216 |
return false; |
| 2217 |
|
2217 |
|
| 2218 |
// The intrinsic will be sunk behind a compare against zero and branch. |
2218 |
// The intrinsic will be sunk behind a compare against zero and branch. |
| 2219 |
BasicBlock *StartBlock = CountZeros->getParent(); |
2219 |
BasicBlock *StartBlock = CountZeros->getParent(); |
| 2220 |
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); |
2220 |
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); |
| 2221 |
if (IsHugeFunc) |
2221 |
if (IsHugeFunc) |
| 2222 |
FreshBBs.insert(CallBlock); |
2222 |
FreshBBs.insert(CallBlock); |
| 2223 |
|
2223 |
|
| 2224 |
// Create another block after the count zero intrinsic. A PHI will be added |
2224 |
// Create another block after the count zero intrinsic. A PHI will be added |
| 2225 |
// in this block to select the result of the intrinsic or the bit-width |
2225 |
// in this block to select the result of the intrinsic or the bit-width |
| 2226 |
// constant if the input to the intrinsic is zero. |
2226 |
// constant if the input to the intrinsic is zero. |
| 2227 |
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); |
2227 |
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); |
| 2228 |
BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); |
2228 |
BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); |
| 2229 |
if (IsHugeFunc) |
2229 |
if (IsHugeFunc) |
| 2230 |
FreshBBs.insert(EndBlock); |
2230 |
FreshBBs.insert(EndBlock); |
| 2231 |
|
2231 |
|
| 2232 |
// Update the LoopInfo. The new blocks are in the same loop as the start |
2232 |
// Update the LoopInfo. The new blocks are in the same loop as the start |
| 2233 |
// block. |
2233 |
// block. |
| 2234 |
if (Loop *L = LI.getLoopFor(StartBlock)) { |
2234 |
if (Loop *L = LI.getLoopFor(StartBlock)) { |
| 2235 |
L->addBasicBlockToLoop(CallBlock, LI); |
2235 |
L->addBasicBlockToLoop(CallBlock, LI); |
| 2236 |
L->addBasicBlockToLoop(EndBlock, LI); |
2236 |
L->addBasicBlockToLoop(EndBlock, LI); |
| 2237 |
} |
2237 |
} |
| 2238 |
|
2238 |
|
| 2239 |
// Set up a builder to create a compare, conditional branch, and PHI. |
2239 |
// Set up a builder to create a compare, conditional branch, and PHI. |
| 2240 |
IRBuilder<> Builder(CountZeros->getContext()); |
2240 |
IRBuilder<> Builder(CountZeros->getContext()); |
| 2241 |
Builder.SetInsertPoint(StartBlock->getTerminator()); |
2241 |
Builder.SetInsertPoint(StartBlock->getTerminator()); |
| 2242 |
Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); |
2242 |
Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); |
| 2243 |
|
2243 |
|
| 2244 |
// Replace the unconditional branch that was created by the first split with |
2244 |
// Replace the unconditional branch that was created by the first split with |
| 2245 |
// a compare against zero and a conditional branch. |
2245 |
// a compare against zero and a conditional branch. |
| 2246 |
Value *Zero = Constant::getNullValue(Ty); |
2246 |
Value *Zero = Constant::getNullValue(Ty); |
| 2247 |
// Avoid introducing branch on poison. This also replaces the ctz operand. |
2247 |
// Avoid introducing branch on poison. This also replaces the ctz operand. |
| 2248 |
if (!isGuaranteedNotToBeUndefOrPoison(Op)) |
2248 |
if (!isGuaranteedNotToBeUndefOrPoison(Op)) |
| 2249 |
Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); |
2249 |
Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); |
| 2250 |
Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); |
2250 |
Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); |
| 2251 |
Builder.CreateCondBr(Cmp, EndBlock, CallBlock); |
2251 |
Builder.CreateCondBr(Cmp, EndBlock, CallBlock); |
| 2252 |
StartBlock->getTerminator()->eraseFromParent(); |
2252 |
StartBlock->getTerminator()->eraseFromParent(); |
| 2253 |
|
2253 |
|
| 2254 |
// Create a PHI in the end block to select either the output of the intrinsic |
2254 |
// Create a PHI in the end block to select either the output of the intrinsic |
| 2255 |
// or the bit width of the operand. |
2255 |
// or the bit width of the operand. |
| 2256 |
Builder.SetInsertPoint(&EndBlock->front()); |
2256 |
Builder.SetInsertPoint(&EndBlock->front()); |
| 2257 |
PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); |
2257 |
PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); |
| 2258 |
replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc); |
2258 |
replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc); |
| 2259 |
Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); |
2259 |
Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); |
| 2260 |
PN->addIncoming(BitWidth, StartBlock); |
2260 |
PN->addIncoming(BitWidth, StartBlock); |
| 2261 |
PN->addIncoming(CountZeros, CallBlock); |
2261 |
PN->addIncoming(CountZeros, CallBlock); |
| 2262 |
|
2262 |
|
| 2263 |
// We are explicitly handling the zero case, so we can set the intrinsic's |
2263 |
// We are explicitly handling the zero case, so we can set the intrinsic's |
| 2264 |
// undefined zero argument to 'true'. This will also prevent reprocessing the |
2264 |
// undefined zero argument to 'true'. This will also prevent reprocessing the |
| 2265 |
// intrinsic; we only despeculate when a zero input is defined. |
2265 |
// intrinsic; we only despeculate when a zero input is defined. |
| 2266 |
CountZeros->setArgOperand(1, Builder.getTrue()); |
2266 |
CountZeros->setArgOperand(1, Builder.getTrue()); |
| 2267 |
ModifiedDT = ModifyDT::ModifyBBDT; |
2267 |
ModifiedDT = ModifyDT::ModifyBBDT; |
| 2268 |
return true; |
2268 |
return true; |
| 2269 |
} |
2269 |
} |
| 2270 |
|
2270 |
|
| 2271 |
bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { |
2271 |
bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { |
| 2272 |
BasicBlock *BB = CI->getParent(); |
2272 |
BasicBlock *BB = CI->getParent(); |
| 2273 |
|
2273 |
|
| 2274 |
// Lower inline assembly if we can. |
2274 |
// Lower inline assembly if we can. |
| 2275 |
// If we found an inline asm expession, and if the target knows how to |
2275 |
// If we found an inline asm expession, and if the target knows how to |
| 2276 |
// lower it to normal LLVM code, do so now. |
2276 |
// lower it to normal LLVM code, do so now. |
| 2277 |
if (CI->isInlineAsm()) { |
2277 |
if (CI->isInlineAsm()) { |
| 2278 |
if (TLI->ExpandInlineAsm(CI)) { |
2278 |
if (TLI->ExpandInlineAsm(CI)) { |
| 2279 |
// Avoid invalidating the iterator. |
2279 |
// Avoid invalidating the iterator. |
| 2280 |
CurInstIterator = BB->begin(); |
2280 |
CurInstIterator = BB->begin(); |
| 2281 |
// Avoid processing instructions out of order, which could cause |
2281 |
// Avoid processing instructions out of order, which could cause |
| 2282 |
// reuse before a value is defined. |
2282 |
// reuse before a value is defined. |
| 2283 |
SunkAddrs.clear(); |
2283 |
SunkAddrs.clear(); |
| 2284 |
return true; |
2284 |
return true; |
| 2285 |
} |
2285 |
} |
| 2286 |
// Sink address computing for memory operands into the block. |
2286 |
// Sink address computing for memory operands into the block. |
| 2287 |
if (optimizeInlineAsmInst(CI)) |
2287 |
if (optimizeInlineAsmInst(CI)) |
| 2288 |
return true; |
2288 |
return true; |
| 2289 |
} |
2289 |
} |
| 2290 |
|
2290 |
|
| 2291 |
// Align the pointer arguments to this call if the target thinks it's a good |
2291 |
// Align the pointer arguments to this call if the target thinks it's a good |
| 2292 |
// idea |
2292 |
// idea |
| 2293 |
unsigned MinSize; |
2293 |
unsigned MinSize; |
| 2294 |
Align PrefAlign; |
2294 |
Align PrefAlign; |
| 2295 |
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { |
2295 |
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { |
| 2296 |
for (auto &Arg : CI->args()) { |
2296 |
for (auto &Arg : CI->args()) { |
| 2297 |
// We want to align both objects whose address is used directly and |
2297 |
// We want to align both objects whose address is used directly and |
| 2298 |
// objects whose address is used in casts and GEPs, though it only makes |
2298 |
// objects whose address is used in casts and GEPs, though it only makes |
| 2299 |
// sense for GEPs if the offset is a multiple of the desired alignment and |
2299 |
// sense for GEPs if the offset is a multiple of the desired alignment and |
| 2300 |
// if size - offset meets the size threshold. |
2300 |
// if size - offset meets the size threshold. |
| 2301 |
if (!Arg->getType()->isPointerTy()) |
2301 |
if (!Arg->getType()->isPointerTy()) |
| 2302 |
continue; |
2302 |
continue; |
| 2303 |
APInt Offset(DL->getIndexSizeInBits( |
2303 |
APInt Offset(DL->getIndexSizeInBits( |
| 2304 |
cast(Arg->getType())->getAddressSpace()), |
2304 |
cast(Arg->getType())->getAddressSpace()), |
| 2305 |
0); |
2305 |
0); |
| 2306 |
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); |
2306 |
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); |
| 2307 |
uint64_t Offset2 = Offset.getLimitedValue(); |
2307 |
uint64_t Offset2 = Offset.getLimitedValue(); |
| 2308 |
if (!isAligned(PrefAlign, Offset2)) |
2308 |
if (!isAligned(PrefAlign, Offset2)) |
| 2309 |
continue; |
2309 |
continue; |
| 2310 |
AllocaInst *AI; |
2310 |
AllocaInst *AI; |
| 2311 |
if ((AI = dyn_cast(Val)) && AI->getAlign() < PrefAlign && |
2311 |
if ((AI = dyn_cast(Val)) && AI->getAlign() < PrefAlign && |
| 2312 |
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) |
2312 |
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) |
| 2313 |
AI->setAlignment(PrefAlign); |
2313 |
AI->setAlignment(PrefAlign); |
| 2314 |
// Global variables can only be aligned if they are defined in this |
2314 |
// Global variables can only be aligned if they are defined in this |
| 2315 |
// object (i.e. they are uniquely initialized in this object), and |
2315 |
// object (i.e. they are uniquely initialized in this object), and |
| 2316 |
// over-aligning global variables that have an explicit section is |
2316 |
// over-aligning global variables that have an explicit section is |
| 2317 |
// forbidden. |
2317 |
// forbidden. |
| 2318 |
GlobalVariable *GV; |
2318 |
GlobalVariable *GV; |
| 2319 |
if ((GV = dyn_cast(Val)) && GV->canIncreaseAlignment() && |
2319 |
if ((GV = dyn_cast(Val)) && GV->canIncreaseAlignment() && |
| 2320 |
GV->getPointerAlignment(*DL) < PrefAlign && |
2320 |
GV->getPointerAlignment(*DL) < PrefAlign && |
| 2321 |
DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) |
2321 |
DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) |
| 2322 |
GV->setAlignment(PrefAlign); |
2322 |
GV->setAlignment(PrefAlign); |
| 2323 |
} |
2323 |
} |
| 2324 |
} |
2324 |
} |
| 2325 |
// If this is a memcpy (or similar) then we may be able to improve the |
2325 |
// If this is a memcpy (or similar) then we may be able to improve the |
| 2326 |
// alignment. |
2326 |
// alignment. |
| 2327 |
if (MemIntrinsic *MI = dyn_cast(CI)) { |
2327 |
if (MemIntrinsic *MI = dyn_cast(CI)) { |
| 2328 |
Align DestAlign = getKnownAlignment(MI->getDest(), *DL); |
2328 |
Align DestAlign = getKnownAlignment(MI->getDest(), *DL); |
| 2329 |
MaybeAlign MIDestAlign = MI->getDestAlign(); |
2329 |
MaybeAlign MIDestAlign = MI->getDestAlign(); |
| 2330 |
if (!MIDestAlign || DestAlign > *MIDestAlign) |
2330 |
if (!MIDestAlign || DestAlign > *MIDestAlign) |
| 2331 |
MI->setDestAlignment(DestAlign); |
2331 |
MI->setDestAlignment(DestAlign); |
| 2332 |
if (MemTransferInst *MTI = dyn_cast(MI)) { |
2332 |
if (MemTransferInst *MTI = dyn_cast(MI)) { |
| 2333 |
MaybeAlign MTISrcAlign = MTI->getSourceAlign(); |
2333 |
MaybeAlign MTISrcAlign = MTI->getSourceAlign(); |
| 2334 |
Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); |
2334 |
Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL); |
| 2335 |
if (!MTISrcAlign || SrcAlign > *MTISrcAlign) |
2335 |
if (!MTISrcAlign || SrcAlign > *MTISrcAlign) |
| 2336 |
MTI->setSourceAlignment(SrcAlign); |
2336 |
MTI->setSourceAlignment(SrcAlign); |
| 2337 |
} |
2337 |
} |
| 2338 |
} |
2338 |
} |
| 2339 |
|
2339 |
|
| 2340 |
// If we have a cold call site, try to sink addressing computation into the |
2340 |
// If we have a cold call site, try to sink addressing computation into the |
| 2341 |
// cold block. This interacts with our handling for loads and stores to |
2341 |
// cold block. This interacts with our handling for loads and stores to |
| 2342 |
// ensure that we can fold all uses of a potential addressing computation |
2342 |
// ensure that we can fold all uses of a potential addressing computation |
| 2343 |
// into their uses. TODO: generalize this to work over profiling data |
2343 |
// into their uses. TODO: generalize this to work over profiling data |
| 2344 |
if (CI->hasFnAttr(Attribute::Cold) && !OptSize && |
2344 |
if (CI->hasFnAttr(Attribute::Cold) && !OptSize && |
| 2345 |
!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) |
2345 |
!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) |
| 2346 |
for (auto &Arg : CI->args()) { |
2346 |
for (auto &Arg : CI->args()) { |
| 2347 |
if (!Arg->getType()->isPointerTy()) |
2347 |
if (!Arg->getType()->isPointerTy()) |
| 2348 |
continue; |
2348 |
continue; |
| 2349 |
unsigned AS = Arg->getType()->getPointerAddressSpace(); |
2349 |
unsigned AS = Arg->getType()->getPointerAddressSpace(); |
| 2350 |
if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) |
2350 |
if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) |
| 2351 |
return true; |
2351 |
return true; |
| 2352 |
} |
2352 |
} |
| 2353 |
|
2353 |
|
| 2354 |
IntrinsicInst *II = dyn_cast(CI); |
2354 |
IntrinsicInst *II = dyn_cast(CI); |
| 2355 |
if (II) { |
2355 |
if (II) { |
| 2356 |
switch (II->getIntrinsicID()) { |
2356 |
switch (II->getIntrinsicID()) { |
| 2357 |
default: |
2357 |
default: |
| 2358 |
break; |
2358 |
break; |
| 2359 |
case Intrinsic::assume: |
2359 |
case Intrinsic::assume: |
| 2360 |
llvm_unreachable("llvm.assume should have been removed already"); |
2360 |
llvm_unreachable("llvm.assume should have been removed already"); |
| 2361 |
case Intrinsic::experimental_widenable_condition: { |
2361 |
case Intrinsic::experimental_widenable_condition: { |
| 2362 |
// Give up on future widening oppurtunties so that we can fold away dead |
2362 |
// Give up on future widening oppurtunties so that we can fold away dead |
| 2363 |
// paths and merge blocks before going into block-local instruction |
2363 |
// paths and merge blocks before going into block-local instruction |
| 2364 |
// selection. |
2364 |
// selection. |
| 2365 |
if (II->use_empty()) { |
2365 |
if (II->use_empty()) { |
| 2366 |
II->eraseFromParent(); |
2366 |
II->eraseFromParent(); |
| 2367 |
return true; |
2367 |
return true; |
| 2368 |
} |
2368 |
} |
| 2369 |
Constant *RetVal = ConstantInt::getTrue(II->getContext()); |
2369 |
Constant *RetVal = ConstantInt::getTrue(II->getContext()); |
| 2370 |
resetIteratorIfInvalidatedWhileCalling(BB, [&]() { |
2370 |
resetIteratorIfInvalidatedWhileCalling(BB, [&]() { |
| 2371 |
replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); |
2371 |
replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); |
| 2372 |
}); |
2372 |
}); |
| 2373 |
return true; |
2373 |
return true; |
| 2374 |
} |
2374 |
} |
| 2375 |
case Intrinsic::objectsize: |
2375 |
case Intrinsic::objectsize: |
| 2376 |
llvm_unreachable("llvm.objectsize.* should have been lowered already"); |
2376 |
llvm_unreachable("llvm.objectsize.* should have been lowered already"); |
| 2377 |
case Intrinsic::is_constant: |
2377 |
case Intrinsic::is_constant: |
| 2378 |
llvm_unreachable("llvm.is.constant.* should have been lowered already"); |
2378 |
llvm_unreachable("llvm.is.constant.* should have been lowered already"); |
| 2379 |
case Intrinsic::aarch64_stlxr: |
2379 |
case Intrinsic::aarch64_stlxr: |
| 2380 |
case Intrinsic::aarch64_stxr: { |
2380 |
case Intrinsic::aarch64_stxr: { |
| 2381 |
ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); |
2381 |
ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); |
| 2382 |
if (!ExtVal || !ExtVal->hasOneUse() || |
2382 |
if (!ExtVal || !ExtVal->hasOneUse() || |
| 2383 |
ExtVal->getParent() == CI->getParent()) |
2383 |
ExtVal->getParent() == CI->getParent()) |
| 2384 |
return false; |
2384 |
return false; |
| 2385 |
// Sink a zext feeding stlxr/stxr before it, so it can be folded into it. |
2385 |
// Sink a zext feeding stlxr/stxr before it, so it can be folded into it. |
| 2386 |
ExtVal->moveBefore(CI); |
2386 |
ExtVal->moveBefore(CI); |
| 2387 |
// Mark this instruction as "inserted by CGP", so that other |
2387 |
// Mark this instruction as "inserted by CGP", so that other |
| 2388 |
// optimizations don't touch it. |
2388 |
// optimizations don't touch it. |
| 2389 |
InsertedInsts.insert(ExtVal); |
2389 |
InsertedInsts.insert(ExtVal); |
| 2390 |
return true; |
2390 |
return true; |
| 2391 |
} |
2391 |
} |
| 2392 |
|
2392 |
|
| 2393 |
case Intrinsic::launder_invariant_group: |
2393 |
case Intrinsic::launder_invariant_group: |
| 2394 |
case Intrinsic::strip_invariant_group: { |
2394 |
case Intrinsic::strip_invariant_group: { |
| 2395 |
Value *ArgVal = II->getArgOperand(0); |
2395 |
Value *ArgVal = II->getArgOperand(0); |
| 2396 |
auto it = LargeOffsetGEPMap.find(II); |
2396 |
auto it = LargeOffsetGEPMap.find(II); |
| 2397 |
if (it != LargeOffsetGEPMap.end()) { |
2397 |
if (it != LargeOffsetGEPMap.end()) { |
| 2398 |
// Merge entries in LargeOffsetGEPMap to reflect the RAUW. |
2398 |
// Merge entries in LargeOffsetGEPMap to reflect the RAUW. |
| 2399 |
// Make sure not to have to deal with iterator invalidation |
2399 |
// Make sure not to have to deal with iterator invalidation |
| 2400 |
// after possibly adding ArgVal to LargeOffsetGEPMap. |
2400 |
// after possibly adding ArgVal to LargeOffsetGEPMap. |
| 2401 |
auto GEPs = std::move(it->second); |
2401 |
auto GEPs = std::move(it->second); |
| 2402 |
LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); |
2402 |
LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); |
| 2403 |
LargeOffsetGEPMap.erase(II); |
2403 |
LargeOffsetGEPMap.erase(II); |
| 2404 |
} |
2404 |
} |
| 2405 |
|
2405 |
|
| 2406 |
replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc); |
2406 |
replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc); |
| 2407 |
II->eraseFromParent(); |
2407 |
II->eraseFromParent(); |
| 2408 |
return true; |
2408 |
return true; |
| 2409 |
} |
2409 |
} |
| 2410 |
case Intrinsic::cttz: |
2410 |
case Intrinsic::cttz: |
| 2411 |
case Intrinsic::ctlz: |
2411 |
case Intrinsic::ctlz: |
| 2412 |
// If counting zeros is expensive, try to avoid it. |
2412 |
// If counting zeros is expensive, try to avoid it. |
| 2413 |
return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs, |
2413 |
return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs, |
| 2414 |
IsHugeFunc); |
2414 |
IsHugeFunc); |
| 2415 |
case Intrinsic::fshl: |
2415 |
case Intrinsic::fshl: |
| 2416 |
case Intrinsic::fshr: |
2416 |
case Intrinsic::fshr: |
| 2417 |
return optimizeFunnelShift(II); |
2417 |
return optimizeFunnelShift(II); |
| 2418 |
case Intrinsic::dbg_assign: |
2418 |
case Intrinsic::dbg_assign: |
| 2419 |
case Intrinsic::dbg_value: |
2419 |
case Intrinsic::dbg_value: |
| 2420 |
return fixupDbgValue(II); |
2420 |
return fixupDbgValue(II); |
| 2421 |
case Intrinsic::masked_gather: |
2421 |
case Intrinsic::masked_gather: |
| 2422 |
return optimizeGatherScatterInst(II, II->getArgOperand(0)); |
2422 |
return optimizeGatherScatterInst(II, II->getArgOperand(0)); |
| 2423 |
case Intrinsic::masked_scatter: |
2423 |
case Intrinsic::masked_scatter: |
| 2424 |
return optimizeGatherScatterInst(II, II->getArgOperand(1)); |
2424 |
return optimizeGatherScatterInst(II, II->getArgOperand(1)); |
| 2425 |
} |
2425 |
} |
| 2426 |
|
2426 |
|
| 2427 |
SmallVector PtrOps; |
2427 |
SmallVector PtrOps; |
| 2428 |
Type *AccessTy; |
2428 |
Type *AccessTy; |
| 2429 |
if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) |
2429 |
if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) |
| 2430 |
while (!PtrOps.empty()) { |
2430 |
while (!PtrOps.empty()) { |
| 2431 |
Value *PtrVal = PtrOps.pop_back_val(); |
2431 |
Value *PtrVal = PtrOps.pop_back_val(); |
| 2432 |
unsigned AS = PtrVal->getType()->getPointerAddressSpace(); |
2432 |
unsigned AS = PtrVal->getType()->getPointerAddressSpace(); |
| 2433 |
if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) |
2433 |
if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) |
| 2434 |
return true; |
2434 |
return true; |
| 2435 |
} |
2435 |
} |
| 2436 |
} |
2436 |
} |
| 2437 |
|
2437 |
|
| 2438 |
// From here on out we're working with named functions. |
2438 |
// From here on out we're working with named functions. |
| 2439 |
if (!CI->getCalledFunction()) |
2439 |
if (!CI->getCalledFunction()) |
| 2440 |
return false; |
2440 |
return false; |
| 2441 |
|
2441 |
|
| 2442 |
// Lower all default uses of _chk calls. This is very similar |
2442 |
// Lower all default uses of _chk calls. This is very similar |
| 2443 |
// to what InstCombineCalls does, but here we are only lowering calls |
2443 |
// to what InstCombineCalls does, but here we are only lowering calls |
| 2444 |
// to fortified library functions (e.g. __memcpy_chk) that have the default |
2444 |
// to fortified library functions (e.g. __memcpy_chk) that have the default |
| 2445 |
// "don't know" as the objectsize. Anything else should be left alone. |
2445 |
// "don't know" as the objectsize. Anything else should be left alone. |
| 2446 |
FortifiedLibCallSimplifier Simplifier(TLInfo, true); |
2446 |
FortifiedLibCallSimplifier Simplifier(TLInfo, true); |
| 2447 |
IRBuilder<> Builder(CI); |
2447 |
IRBuilder<> Builder(CI); |
| 2448 |
if (Value *V = Simplifier.optimizeCall(CI, Builder)) { |
2448 |
if (Value *V = Simplifier.optimizeCall(CI, Builder)) { |
| 2449 |
replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc); |
2449 |
replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc); |
| 2450 |
CI->eraseFromParent(); |
2450 |
CI->eraseFromParent(); |
| 2451 |
return true; |
2451 |
return true; |
| 2452 |
} |
2452 |
} |
| 2453 |
|
2453 |
|
| 2454 |
return false; |
2454 |
return false; |
| 2455 |
} |
2455 |
} |
| 2456 |
|
2456 |
|
| 2457 |
/// Look for opportunities to duplicate return instructions to the predecessor |
2457 |
/// Look for opportunities to duplicate return instructions to the predecessor |
| 2458 |
/// to enable tail call optimizations. The case it is currently looking for is: |
2458 |
/// to enable tail call optimizations. The case it is currently looking for is: |
| 2459 |
/// @code |
2459 |
/// @code |
| 2460 |
/// bb0: |
2460 |
/// bb0: |
| 2461 |
/// %tmp0 = tail call i32 @f0() |
2461 |
/// %tmp0 = tail call i32 @f0() |
| 2462 |
/// br label %return |
2462 |
/// br label %return |
| 2463 |
/// bb1: |
2463 |
/// bb1: |
| 2464 |
/// %tmp1 = tail call i32 @f1() |
2464 |
/// %tmp1 = tail call i32 @f1() |
| 2465 |
/// br label %return |
2465 |
/// br label %return |
| 2466 |
/// bb2: |
2466 |
/// bb2: |
| 2467 |
/// %tmp2 = tail call i32 @f2() |
2467 |
/// %tmp2 = tail call i32 @f2() |
| 2468 |
/// br label %return |
2468 |
/// br label %return |
| 2469 |
/// return: |
2469 |
/// return: |
| 2470 |
/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] |
2470 |
/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] |
| 2471 |
/// ret i32 %retval |
2471 |
/// ret i32 %retval |
| 2472 |
/// @endcode |
2472 |
/// @endcode |
| 2473 |
/// |
2473 |
/// |
| 2474 |
/// => |
2474 |
/// => |
| 2475 |
/// |
2475 |
/// |
| 2476 |
/// @code |
2476 |
/// @code |
| 2477 |
/// bb0: |
2477 |
/// bb0: |
| 2478 |
/// %tmp0 = tail call i32 @f0() |
2478 |
/// %tmp0 = tail call i32 @f0() |
| 2479 |
/// ret i32 %tmp0 |
2479 |
/// ret i32 %tmp0 |
| 2480 |
/// bb1: |
2480 |
/// bb1: |
| 2481 |
/// %tmp1 = tail call i32 @f1() |
2481 |
/// %tmp1 = tail call i32 @f1() |
| 2482 |
/// ret i32 %tmp1 |
2482 |
/// ret i32 %tmp1 |
| 2483 |
/// bb2: |
2483 |
/// bb2: |
| 2484 |
/// %tmp2 = tail call i32 @f2() |
2484 |
/// %tmp2 = tail call i32 @f2() |
| 2485 |
/// ret i32 %tmp2 |
2485 |
/// ret i32 %tmp2 |
| 2486 |
/// @endcode |
2486 |
/// @endcode |
| 2487 |
bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, |
2487 |
bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, |
| 2488 |
ModifyDT &ModifiedDT) { |
2488 |
ModifyDT &ModifiedDT) { |
| 2489 |
if (!BB->getTerminator()) |
2489 |
if (!BB->getTerminator()) |
| 2490 |
return false; |
2490 |
return false; |
| 2491 |
|
2491 |
|
| 2492 |
ReturnInst *RetI = dyn_cast(BB->getTerminator()); |
2492 |
ReturnInst *RetI = dyn_cast(BB->getTerminator()); |
| 2493 |
if (!RetI) |
2493 |
if (!RetI) |
| 2494 |
return false; |
2494 |
return false; |
| 2495 |
|
2495 |
|
| 2496 |
assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop"); |
2496 |
assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop"); |
| 2497 |
|
2497 |
|
| 2498 |
PHINode *PN = nullptr; |
2498 |
PHINode *PN = nullptr; |
| 2499 |
ExtractValueInst *EVI = nullptr; |
2499 |
ExtractValueInst *EVI = nullptr; |
| 2500 |
BitCastInst *BCI = nullptr; |
2500 |
BitCastInst *BCI = nullptr; |
| 2501 |
Value *V = RetI->getReturnValue(); |
2501 |
Value *V = RetI->getReturnValue(); |
| 2502 |
if (V) { |
2502 |
if (V) { |
| 2503 |
BCI = dyn_cast(V); |
2503 |
BCI = dyn_cast(V); |
| 2504 |
if (BCI) |
2504 |
if (BCI) |
| 2505 |
V = BCI->getOperand(0); |
2505 |
V = BCI->getOperand(0); |
| 2506 |
|
2506 |
|
| 2507 |
EVI = dyn_cast(V); |
2507 |
EVI = dyn_cast(V); |
| 2508 |
if (EVI) { |
2508 |
if (EVI) { |
| 2509 |
V = EVI->getOperand(0); |
2509 |
V = EVI->getOperand(0); |
| 2510 |
if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; })) |
2510 |
if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; })) |
| 2511 |
return false; |
2511 |
return false; |
| 2512 |
} |
2512 |
} |
| 2513 |
|
2513 |
|
| 2514 |
PN = dyn_cast(V); |
2514 |
PN = dyn_cast(V); |
| 2515 |
if (!PN) |
2515 |
if (!PN) |
| 2516 |
return false; |
2516 |
return false; |
| 2517 |
} |
2517 |
} |
| 2518 |
|
2518 |
|
| 2519 |
if (PN && PN->getParent() != BB) |
2519 |
if (PN && PN->getParent() != BB) |
| 2520 |
return false; |
2520 |
return false; |
| 2521 |
|
2521 |
|
| 2522 |
auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) { |
2522 |
auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) { |
| 2523 |
const BitCastInst *BC = dyn_cast(Inst); |
2523 |
const BitCastInst *BC = dyn_cast(Inst); |
| 2524 |
if (BC && BC->hasOneUse()) |
2524 |
if (BC && BC->hasOneUse()) |
| 2525 |
Inst = BC->user_back(); |
2525 |
Inst = BC->user_back(); |
| 2526 |
|
2526 |
|
| 2527 |
if (const IntrinsicInst *II = dyn_cast(Inst)) |
2527 |
if (const IntrinsicInst *II = dyn_cast(Inst)) |
| 2528 |
return II->getIntrinsicID() == Intrinsic::lifetime_end; |
2528 |
return II->getIntrinsicID() == Intrinsic::lifetime_end; |
| 2529 |
return false; |
2529 |
return false; |
| 2530 |
}; |
2530 |
}; |
| 2531 |
|
2531 |
|
| 2532 |
// Make sure there are no instructions between the first instruction |
2532 |
// Make sure there are no instructions between the first instruction |
| 2533 |
// and return. |
2533 |
// and return. |
| 2534 |
const Instruction *BI = BB->getFirstNonPHI(); |
2534 |
const Instruction *BI = BB->getFirstNonPHI(); |
| 2535 |
// Skip over debug and the bitcast. |
2535 |
// Skip over debug and the bitcast. |
| 2536 |
while (isa(BI) || BI == BCI || BI == EVI || |
2536 |
while (isa(BI) || BI == BCI || BI == EVI || |
| 2537 |
isa(BI) || isLifetimeEndOrBitCastFor(BI)) |
2537 |
isa(BI) || isLifetimeEndOrBitCastFor(BI)) |
| 2538 |
BI = BI->getNextNode(); |
2538 |
BI = BI->getNextNode(); |
| 2539 |
if (BI != RetI) |
2539 |
if (BI != RetI) |
| 2540 |
return false; |
2540 |
return false; |
| 2541 |
|
2541 |
|
| 2542 |
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail |
2542 |
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail |
| 2543 |
/// call. |
2543 |
/// call. |
| 2544 |
const Function *F = BB->getParent(); |
2544 |
const Function *F = BB->getParent(); |
| 2545 |
SmallVector TailCallBBs; |
2545 |
SmallVector TailCallBBs; |
| 2546 |
if (PN) { |
2546 |
if (PN) { |
| 2547 |
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { |
2547 |
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { |
| 2548 |
// Look through bitcasts. |
2548 |
// Look through bitcasts. |
| 2549 |
Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); |
2549 |
Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); |
| 2550 |
CallInst *CI = dyn_cast(IncomingVal); |
2550 |
CallInst *CI = dyn_cast(IncomingVal); |
| 2551 |
BasicBlock *PredBB = PN->getIncomingBlock(I); |
2551 |
BasicBlock *PredBB = PN->getIncomingBlock(I); |
| 2552 |
// Make sure the phi value is indeed produced by the tail call. |
2552 |
// Make sure the phi value is indeed produced by the tail call. |
| 2553 |
if (CI && CI->hasOneUse() && CI->getParent() == PredBB && |
2553 |
if (CI && CI->hasOneUse() && CI->getParent() == PredBB && |
| 2554 |
TLI->mayBeEmittedAsTailCall(CI) && |
2554 |
TLI->mayBeEmittedAsTailCall(CI) && |
| 2555 |
attributesPermitTailCall(F, CI, RetI, *TLI)) |
2555 |
attributesPermitTailCall(F, CI, RetI, *TLI)) |
| 2556 |
TailCallBBs.push_back(PredBB); |
2556 |
TailCallBBs.push_back(PredBB); |
| 2557 |
} |
2557 |
} |
| 2558 |
} else { |
2558 |
} else { |
| 2559 |
SmallPtrSet VisitedBBs; |
2559 |
SmallPtrSet VisitedBBs; |
| 2560 |
for (BasicBlock *Pred : predecessors(BB)) { |
2560 |
for (BasicBlock *Pred : predecessors(BB)) { |
| 2561 |
if (!VisitedBBs.insert(Pred).second) |
2561 |
if (!VisitedBBs.insert(Pred).second) |
| 2562 |
continue; |
2562 |
continue; |
| 2563 |
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { |
2563 |
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { |
| 2564 |
CallInst *CI = dyn_cast(I); |
2564 |
CallInst *CI = dyn_cast(I); |
| 2565 |
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && |
2565 |
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && |
| 2566 |
attributesPermitTailCall(F, CI, RetI, *TLI)) |
2566 |
attributesPermitTailCall(F, CI, RetI, *TLI)) |
| 2567 |
TailCallBBs.push_back(Pred); |
2567 |
TailCallBBs.push_back(Pred); |
| 2568 |
} |
2568 |
} |
| 2569 |
} |
2569 |
} |
| 2570 |
} |
2570 |
} |
| 2571 |
|
2571 |
|
| 2572 |
bool Changed = false; |
2572 |
bool Changed = false; |
| 2573 |
for (auto const &TailCallBB : TailCallBBs) { |
2573 |
for (auto const &TailCallBB : TailCallBBs) { |
| 2574 |
// Make sure the call instruction is followed by an unconditional branch to |
2574 |
// Make sure the call instruction is followed by an unconditional branch to |
| 2575 |
// the return block. |
2575 |
// the return block. |
| 2576 |
BranchInst *BI = dyn_cast(TailCallBB->getTerminator()); |
2576 |
BranchInst *BI = dyn_cast(TailCallBB->getTerminator()); |
| 2577 |
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) |
2577 |
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) |
| 2578 |
continue; |
2578 |
continue; |
| 2579 |
|
2579 |
|
| 2580 |
// Duplicate the return into TailCallBB. |
2580 |
// Duplicate the return into TailCallBB. |
| 2581 |
(void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); |
2581 |
(void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); |
| 2582 |
assert(!VerifyBFIUpdates || |
2582 |
assert(!VerifyBFIUpdates || |
| 2583 |
BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)); |
2583 |
BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB)); |
| 2584 |
BFI->setBlockFreq( |
2584 |
BFI->setBlockFreq( |
| 2585 |
BB, |
2585 |
BB, |
| 2586 |
(BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency()); |
2586 |
(BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)).getFrequency()); |
| 2587 |
ModifiedDT = ModifyDT::ModifyBBDT; |
2587 |
ModifiedDT = ModifyDT::ModifyBBDT; |
| 2588 |
Changed = true; |
2588 |
Changed = true; |
| 2589 |
++NumRetsDup; |
2589 |
++NumRetsDup; |
| 2590 |
} |
2590 |
} |
| 2591 |
|
2591 |
|
| 2592 |
// If we eliminated all predecessors of the block, delete the block now. |
2592 |
// If we eliminated all predecessors of the block, delete the block now. |
| 2593 |
if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) |
2593 |
if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) |
| 2594 |
BB->eraseFromParent(); |
2594 |
BB->eraseFromParent(); |
| 2595 |
|
2595 |
|
| 2596 |
return Changed; |
2596 |
return Changed; |
| 2597 |
} |
2597 |
} |
| 2598 |
|
2598 |
|
| 2599 |
//===----------------------------------------------------------------------===// |
2599 |
//===----------------------------------------------------------------------===// |
| 2600 |
// Memory Optimization |
2600 |
// Memory Optimization |
| 2601 |
//===----------------------------------------------------------------------===// |
2601 |
//===----------------------------------------------------------------------===// |
| 2602 |
|
2602 |
|
| 2603 |
namespace { |
2603 |
namespace { |
| 2604 |
|
2604 |
|
| 2605 |
/// This is an extended version of TargetLowering::AddrMode |
2605 |
/// This is an extended version of TargetLowering::AddrMode |
| 2606 |
/// which holds actual Value*'s for register values. |
2606 |
/// which holds actual Value*'s for register values. |
| 2607 |
struct ExtAddrMode : public TargetLowering::AddrMode { |
2607 |
struct ExtAddrMode : public TargetLowering::AddrMode { |
| 2608 |
Value *BaseReg = nullptr; |
2608 |
Value *BaseReg = nullptr; |
| 2609 |
Value *ScaledReg = nullptr; |
2609 |
Value *ScaledReg = nullptr; |
| 2610 |
Value *OriginalValue = nullptr; |
2610 |
Value *OriginalValue = nullptr; |
| 2611 |
bool InBounds = true; |
2611 |
bool InBounds = true; |
| 2612 |
|
2612 |
|
| 2613 |
enum FieldName { |
2613 |
enum FieldName { |
| 2614 |
NoField = 0x00, |
2614 |
NoField = 0x00, |
| 2615 |
BaseRegField = 0x01, |
2615 |
BaseRegField = 0x01, |
| 2616 |
BaseGVField = 0x02, |
2616 |
BaseGVField = 0x02, |
| 2617 |
BaseOffsField = 0x04, |
2617 |
BaseOffsField = 0x04, |
| 2618 |
ScaledRegField = 0x08, |
2618 |
ScaledRegField = 0x08, |
| 2619 |
ScaleField = 0x10, |
2619 |
ScaleField = 0x10, |
| 2620 |
MultipleFields = 0xff |
2620 |
MultipleFields = 0xff |
| 2621 |
}; |
2621 |
}; |
| 2622 |
|
2622 |
|
| 2623 |
ExtAddrMode() = default; |
2623 |
ExtAddrMode() = default; |
| 2624 |
|
2624 |
|
| 2625 |
void print(raw_ostream &OS) const; |
2625 |
void print(raw_ostream &OS) const; |
| 2626 |
void dump() const; |
2626 |
void dump() const; |
| 2627 |
|
2627 |
|
| 2628 |
FieldName compare(const ExtAddrMode &other) { |
2628 |
FieldName compare(const ExtAddrMode &other) { |
| 2629 |
// First check that the types are the same on each field, as differing types |
2629 |
// First check that the types are the same on each field, as differing types |
| 2630 |
// is something we can't cope with later on. |
2630 |
// is something we can't cope with later on. |
| 2631 |
if (BaseReg && other.BaseReg && |
2631 |
if (BaseReg && other.BaseReg && |
| 2632 |
BaseReg->getType() != other.BaseReg->getType()) |
2632 |
BaseReg->getType() != other.BaseReg->getType()) |
| 2633 |
return MultipleFields; |
2633 |
return MultipleFields; |
| 2634 |
if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType()) |
2634 |
if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType()) |
| 2635 |
return MultipleFields; |
2635 |
return MultipleFields; |
| 2636 |
if (ScaledReg && other.ScaledReg && |
2636 |
if (ScaledReg && other.ScaledReg && |
| 2637 |
ScaledReg->getType() != other.ScaledReg->getType()) |
2637 |
ScaledReg->getType() != other.ScaledReg->getType()) |
| 2638 |
return MultipleFields; |
2638 |
return MultipleFields; |
| 2639 |
|
2639 |
|
| 2640 |
// Conservatively reject 'inbounds' mismatches. |
2640 |
// Conservatively reject 'inbounds' mismatches. |
| 2641 |
if (InBounds != other.InBounds) |
2641 |
if (InBounds != other.InBounds) |
| 2642 |
return MultipleFields; |
2642 |
return MultipleFields; |
| 2643 |
|
2643 |
|
| 2644 |
// Check each field to see if it differs. |
2644 |
// Check each field to see if it differs. |
| 2645 |
unsigned Result = NoField; |
2645 |
unsigned Result = NoField; |
| 2646 |
if (BaseReg != other.BaseReg) |
2646 |
if (BaseReg != other.BaseReg) |
| 2647 |
Result |= BaseRegField; |
2647 |
Result |= BaseRegField; |
| 2648 |
if (BaseGV != other.BaseGV) |
2648 |
if (BaseGV != other.BaseGV) |
| 2649 |
Result |= BaseGVField; |
2649 |
Result |= BaseGVField; |
| 2650 |
if (BaseOffs != other.BaseOffs) |
2650 |
if (BaseOffs != other.BaseOffs) |
| 2651 |
Result |= BaseOffsField; |
2651 |
Result |= BaseOffsField; |
| 2652 |
if (ScaledReg != other.ScaledReg) |
2652 |
if (ScaledReg != other.ScaledReg) |
| 2653 |
Result |= ScaledRegField; |
2653 |
Result |= ScaledRegField; |
| 2654 |
// Don't count 0 as being a different scale, because that actually means |
2654 |
// Don't count 0 as being a different scale, because that actually means |
| 2655 |
// unscaled (which will already be counted by having no ScaledReg). |
2655 |
// unscaled (which will already be counted by having no ScaledReg). |
| 2656 |
if (Scale && other.Scale && Scale != other.Scale) |
2656 |
if (Scale && other.Scale && Scale != other.Scale) |
| 2657 |
Result |= ScaleField; |
2657 |
Result |= ScaleField; |
| 2658 |
|
2658 |
|
| 2659 |
if (llvm::popcount(Result) > 1) |
2659 |
if (llvm::popcount(Result) > 1) |
| 2660 |
return MultipleFields; |
2660 |
return MultipleFields; |
| 2661 |
else |
2661 |
else |
| 2662 |
return static_cast(Result); |
2662 |
return static_cast(Result); |
| 2663 |
} |
2663 |
} |
| 2664 |
|
2664 |
|
| 2665 |
// An AddrMode is trivial if it involves no calculation i.e. it is just a base |
2665 |
// An AddrMode is trivial if it involves no calculation i.e. it is just a base |
| 2666 |
// with no offset. |
2666 |
// with no offset. |
| 2667 |
bool isTrivial() { |
2667 |
bool isTrivial() { |
| 2668 |
// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is |
2668 |
// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is |
| 2669 |
// trivial if at most one of these terms is nonzero, except that BaseGV and |
2669 |
// trivial if at most one of these terms is nonzero, except that BaseGV and |
| 2670 |
// BaseReg both being zero actually means a null pointer value, which we |
2670 |
// BaseReg both being zero actually means a null pointer value, which we |
| 2671 |
// consider to be 'non-zero' here. |
2671 |
// consider to be 'non-zero' here. |
| 2672 |
return !BaseOffs && !Scale && !(BaseGV && BaseReg); |
2672 |
return !BaseOffs && !Scale && !(BaseGV && BaseReg); |
| 2673 |
} |
2673 |
} |
| 2674 |
|
2674 |
|
| 2675 |
Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { |
2675 |
Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { |
| 2676 |
switch (Field) { |
2676 |
switch (Field) { |
| 2677 |
default: |
2677 |
default: |
| 2678 |
return nullptr; |
2678 |
return nullptr; |
| 2679 |
case BaseRegField: |
2679 |
case BaseRegField: |
| 2680 |
return BaseReg; |
2680 |
return BaseReg; |
| 2681 |
case BaseGVField: |
2681 |
case BaseGVField: |
| 2682 |
return BaseGV; |
2682 |
return BaseGV; |
| 2683 |
case ScaledRegField: |
2683 |
case ScaledRegField: |
| 2684 |
return ScaledReg; |
2684 |
return ScaledReg; |
| 2685 |
case BaseOffsField: |
2685 |
case BaseOffsField: |
| 2686 |
return ConstantInt::get(IntPtrTy, BaseOffs); |
2686 |
return ConstantInt::get(IntPtrTy, BaseOffs); |
| 2687 |
} |
2687 |
} |
| 2688 |
} |
2688 |
} |
| 2689 |
|
2689 |
|
| 2690 |
void SetCombinedField(FieldName Field, Value *V, |
2690 |
void SetCombinedField(FieldName Field, Value *V, |
| 2691 |
const SmallVectorImpl &AddrModes) { |
2691 |
const SmallVectorImpl &AddrModes) { |
| 2692 |
switch (Field) { |
2692 |
switch (Field) { |
| 2693 |
default: |
2693 |
default: |
| 2694 |
llvm_unreachable("Unhandled fields are expected to be rejected earlier"); |
2694 |
llvm_unreachable("Unhandled fields are expected to be rejected earlier"); |
| 2695 |
break; |
2695 |
break; |
| 2696 |
case ExtAddrMode::BaseRegField: |
2696 |
case ExtAddrMode::BaseRegField: |
| 2697 |
BaseReg = V; |
2697 |
BaseReg = V; |
| 2698 |
break; |
2698 |
break; |
| 2699 |
case ExtAddrMode::BaseGVField: |
2699 |
case ExtAddrMode::BaseGVField: |
| 2700 |
// A combined BaseGV is an Instruction, not a GlobalValue, so it goes |
2700 |
// A combined BaseGV is an Instruction, not a GlobalValue, so it goes |
| 2701 |
// in the BaseReg field. |
2701 |
// in the BaseReg field. |
| 2702 |
assert(BaseReg == nullptr); |
2702 |
assert(BaseReg == nullptr); |
| 2703 |
BaseReg = V; |
2703 |
BaseReg = V; |
| 2704 |
BaseGV = nullptr; |
2704 |
BaseGV = nullptr; |
| 2705 |
break; |
2705 |
break; |
| 2706 |
case ExtAddrMode::ScaledRegField: |
2706 |
case ExtAddrMode::ScaledRegField: |
| 2707 |
ScaledReg = V; |
2707 |
ScaledReg = V; |
| 2708 |
// If we have a mix of scaled and unscaled addrmodes then we want scale |
2708 |
// If we have a mix of scaled and unscaled addrmodes then we want scale |
| 2709 |
// to be the scale and not zero. |
2709 |
// to be the scale and not zero. |
| 2710 |
if (!Scale) |
2710 |
if (!Scale) |
| 2711 |
for (const ExtAddrMode &AM : AddrModes) |
2711 |
for (const ExtAddrMode &AM : AddrModes) |
| 2712 |
if (AM.Scale) { |
2712 |
if (AM.Scale) { |
| 2713 |
Scale = AM.Scale; |
2713 |
Scale = AM.Scale; |
| 2714 |
break; |
2714 |
break; |
| 2715 |
} |
2715 |
} |
| 2716 |
break; |
2716 |
break; |
| 2717 |
case ExtAddrMode::BaseOffsField: |
2717 |
case ExtAddrMode::BaseOffsField: |
| 2718 |
// The offset is no longer a constant, so it goes in ScaledReg with a |
2718 |
// The offset is no longer a constant, so it goes in ScaledReg with a |
| 2719 |
// scale of 1. |
2719 |
// scale of 1. |
| 2720 |
assert(ScaledReg == nullptr); |
2720 |
assert(ScaledReg == nullptr); |
| 2721 |
ScaledReg = V; |
2721 |
ScaledReg = V; |
| 2722 |
Scale = 1; |
2722 |
Scale = 1; |
| 2723 |
BaseOffs = 0; |
2723 |
BaseOffs = 0; |
| 2724 |
break; |
2724 |
break; |
| 2725 |
} |
2725 |
} |
| 2726 |
} |
2726 |
} |
| 2727 |
}; |
2727 |
}; |
| 2728 |
|
2728 |
|
| 2729 |
#ifndef NDEBUG |
2729 |
#ifndef NDEBUG |
| 2730 |
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { |
2730 |
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { |
| 2731 |
AM.print(OS); |
2731 |
AM.print(OS); |
| 2732 |
return OS; |
2732 |
return OS; |
| 2733 |
} |
2733 |
} |
| 2734 |
#endif |
2734 |
#endif |
| 2735 |
|
2735 |
|
| 2736 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2736 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 2737 |
void ExtAddrMode::print(raw_ostream &OS) const { |
2737 |
void ExtAddrMode::print(raw_ostream &OS) const { |
| 2738 |
bool NeedPlus = false; |
2738 |
bool NeedPlus = false; |
| 2739 |
OS << "["; |
2739 |
OS << "["; |
| 2740 |
if (InBounds) |
2740 |
if (InBounds) |
| 2741 |
OS << "inbounds "; |
2741 |
OS << "inbounds "; |
| 2742 |
if (BaseGV) { |
2742 |
if (BaseGV) { |
| 2743 |
OS << "GV:"; |
2743 |
OS << "GV:"; |
| 2744 |
BaseGV->printAsOperand(OS, /*PrintType=*/false); |
2744 |
BaseGV->printAsOperand(OS, /*PrintType=*/false); |
| 2745 |
NeedPlus = true; |
2745 |
NeedPlus = true; |
| 2746 |
} |
2746 |
} |
| 2747 |
|
2747 |
|
| 2748 |
if (BaseOffs) { |
2748 |
if (BaseOffs) { |
| 2749 |
OS << (NeedPlus ? " + " : "") << BaseOffs; |
2749 |
OS << (NeedPlus ? " + " : "") << BaseOffs; |
| 2750 |
NeedPlus = true; |
2750 |
NeedPlus = true; |
| 2751 |
} |
2751 |
} |
| 2752 |
|
2752 |
|
| 2753 |
if (BaseReg) { |
2753 |
if (BaseReg) { |
| 2754 |
OS << (NeedPlus ? " + " : "") << "Base:"; |
2754 |
OS << (NeedPlus ? " + " : "") << "Base:"; |
| 2755 |
BaseReg->printAsOperand(OS, /*PrintType=*/false); |
2755 |
BaseReg->printAsOperand(OS, /*PrintType=*/false); |
| 2756 |
NeedPlus = true; |
2756 |
NeedPlus = true; |
| 2757 |
} |
2757 |
} |
| 2758 |
if (Scale) { |
2758 |
if (Scale) { |
| 2759 |
OS << (NeedPlus ? " + " : "") << Scale << "*"; |
2759 |
OS << (NeedPlus ? " + " : "") << Scale << "*"; |
| 2760 |
ScaledReg->printAsOperand(OS, /*PrintType=*/false); |
2760 |
ScaledReg->printAsOperand(OS, /*PrintType=*/false); |
| 2761 |
} |
2761 |
} |
| 2762 |
|
2762 |
|
| 2763 |
OS << ']'; |
2763 |
OS << ']'; |
| 2764 |
} |
2764 |
} |
| 2765 |
|
2765 |
|
| 2766 |
LLVM_DUMP_METHOD void ExtAddrMode::dump() const { |
2766 |
LLVM_DUMP_METHOD void ExtAddrMode::dump() const { |
| 2767 |
print(dbgs()); |
2767 |
print(dbgs()); |
| 2768 |
dbgs() << '\n'; |
2768 |
dbgs() << '\n'; |
| 2769 |
} |
2769 |
} |
| 2770 |
#endif |
2770 |
#endif |
| 2771 |
|
2771 |
|
| 2772 |
} // end anonymous namespace |
2772 |
} // end anonymous namespace |
| 2773 |
|
2773 |
|
| 2774 |
namespace { |
2774 |
namespace { |
| 2775 |
|
2775 |
|
| 2776 |
/// This class provides transaction based operation on the IR. |
2776 |
/// This class provides transaction based operation on the IR. |
| 2777 |
/// Every change made through this class is recorded in the internal state and |
2777 |
/// Every change made through this class is recorded in the internal state and |
| 2778 |
/// can be undone (rollback) until commit is called. |
2778 |
/// can be undone (rollback) until commit is called. |
| 2779 |
/// CGP does not check if instructions could be speculatively executed when |
2779 |
/// CGP does not check if instructions could be speculatively executed when |
| 2780 |
/// moved. Preserving the original location would pessimize the debugging |
2780 |
/// moved. Preserving the original location would pessimize the debugging |
| 2781 |
/// experience, as well as negatively impact the quality of sample PGO. |
2781 |
/// experience, as well as negatively impact the quality of sample PGO. |
| 2782 |
class TypePromotionTransaction { |
2782 |
class TypePromotionTransaction { |
| 2783 |
/// This represents the common interface of the individual transaction. |
2783 |
/// This represents the common interface of the individual transaction. |
| 2784 |
/// Each class implements the logic for doing one specific modification on |
2784 |
/// Each class implements the logic for doing one specific modification on |
| 2785 |
/// the IR via the TypePromotionTransaction. |
2785 |
/// the IR via the TypePromotionTransaction. |
| 2786 |
class TypePromotionAction { |
2786 |
class TypePromotionAction { |
| 2787 |
protected: |
2787 |
protected: |
| 2788 |
/// The Instruction modified. |
2788 |
/// The Instruction modified. |
| 2789 |
Instruction *Inst; |
2789 |
Instruction *Inst; |
| 2790 |
|
2790 |
|
| 2791 |
public: |
2791 |
public: |
| 2792 |
/// Constructor of the action. |
2792 |
/// Constructor of the action. |
| 2793 |
/// The constructor performs the related action on the IR. |
2793 |
/// The constructor performs the related action on the IR. |
| 2794 |
TypePromotionAction(Instruction *Inst) : Inst(Inst) {} |
2794 |
TypePromotionAction(Instruction *Inst) : Inst(Inst) {} |
| 2795 |
|
2795 |
|
| 2796 |
virtual ~TypePromotionAction() = default; |
2796 |
virtual ~TypePromotionAction() = default; |
| 2797 |
|
2797 |
|
| 2798 |
/// Undo the modification done by this action. |
2798 |
/// Undo the modification done by this action. |
| 2799 |
/// When this method is called, the IR must be in the same state as it was |
2799 |
/// When this method is called, the IR must be in the same state as it was |
| 2800 |
/// before this action was applied. |
2800 |
/// before this action was applied. |
| 2801 |
/// \pre Undoing the action works if and only if the IR is in the exact same |
2801 |
/// \pre Undoing the action works if and only if the IR is in the exact same |
| 2802 |
/// state as it was directly after this action was applied. |
2802 |
/// state as it was directly after this action was applied. |
| 2803 |
virtual void undo() = 0; |
2803 |
virtual void undo() = 0; |
| 2804 |
|
2804 |
|
| 2805 |
/// Advocate every change made by this action. |
2805 |
/// Advocate every change made by this action. |
| 2806 |
/// When the results on the IR of the action are to be kept, it is important |
2806 |
/// When the results on the IR of the action are to be kept, it is important |
| 2807 |
/// to call this function, otherwise hidden information may be kept forever. |
2807 |
/// to call this function, otherwise hidden information may be kept forever. |
| 2808 |
virtual void commit() { |
2808 |
virtual void commit() { |
| 2809 |
// Nothing to be done, this action is not doing anything. |
2809 |
// Nothing to be done, this action is not doing anything. |
| 2810 |
} |
2810 |
} |
| 2811 |
}; |
2811 |
}; |
| 2812 |
|
2812 |
|
| 2813 |
/// Utility to remember the position of an instruction. |
2813 |
/// Utility to remember the position of an instruction. |
| 2814 |
class InsertionHandler { |
2814 |
class InsertionHandler { |
| 2815 |
/// Position of an instruction. |
2815 |
/// Position of an instruction. |
| 2816 |
/// Either an instruction: |
2816 |
/// Either an instruction: |
| 2817 |
/// - Is the first in a basic block: BB is used. |
2817 |
/// - Is the first in a basic block: BB is used. |
| 2818 |
/// - Has a previous instruction: PrevInst is used. |
2818 |
/// - Has a previous instruction: PrevInst is used. |
| 2819 |
union { |
2819 |
union { |
| 2820 |
Instruction *PrevInst; |
2820 |
Instruction *PrevInst; |
| 2821 |
BasicBlock *BB; |
2821 |
BasicBlock *BB; |
| 2822 |
} Point; |
2822 |
} Point; |
| 2823 |
|
2823 |
|
| 2824 |
/// Remember whether or not the instruction had a previous instruction. |
2824 |
/// Remember whether or not the instruction had a previous instruction. |
| 2825 |
bool HasPrevInstruction; |
2825 |
bool HasPrevInstruction; |
| 2826 |
|
2826 |
|
| 2827 |
public: |
2827 |
public: |
| 2828 |
/// Record the position of \p Inst. |
2828 |
/// Record the position of \p Inst. |
| 2829 |
InsertionHandler(Instruction *Inst) { |
2829 |
InsertionHandler(Instruction *Inst) { |
| 2830 |
BasicBlock::iterator It = Inst->getIterator(); |
2830 |
BasicBlock::iterator It = Inst->getIterator(); |
| 2831 |
HasPrevInstruction = (It != (Inst->getParent()->begin())); |
2831 |
HasPrevInstruction = (It != (Inst->getParent()->begin())); |
| 2832 |
if (HasPrevInstruction) |
2832 |
if (HasPrevInstruction) |
| 2833 |
Point.PrevInst = &*--It; |
2833 |
Point.PrevInst = &*--It; |
| 2834 |
else |
2834 |
else |
| 2835 |
Point.BB = Inst->getParent(); |
2835 |
Point.BB = Inst->getParent(); |
| 2836 |
} |
2836 |
} |
| 2837 |
|
2837 |
|
| 2838 |
/// Insert \p Inst at the recorded position. |
2838 |
/// Insert \p Inst at the recorded position. |
| 2839 |
void insert(Instruction *Inst) { |
2839 |
void insert(Instruction *Inst) { |
| 2840 |
if (HasPrevInstruction) { |
2840 |
if (HasPrevInstruction) { |
| 2841 |
if (Inst->getParent()) |
2841 |
if (Inst->getParent()) |
| 2842 |
Inst->removeFromParent(); |
2842 |
Inst->removeFromParent(); |
| 2843 |
Inst->insertAfter(Point.PrevInst); |
2843 |
Inst->insertAfter(Point.PrevInst); |
| 2844 |
} else { |
2844 |
} else { |
| 2845 |
Instruction *Position = &*Point.BB->getFirstInsertionPt(); |
2845 |
Instruction *Position = &*Point.BB->getFirstInsertionPt(); |
| 2846 |
if (Inst->getParent()) |
2846 |
if (Inst->getParent()) |
| 2847 |
Inst->moveBefore(Position); |
2847 |
Inst->moveBefore(Position); |
| 2848 |
else |
2848 |
else |
| 2849 |
Inst->insertBefore(Position); |
2849 |
Inst->insertBefore(Position); |
| 2850 |
} |
2850 |
} |
| 2851 |
} |
2851 |
} |
| 2852 |
}; |
2852 |
}; |
| 2853 |
|
2853 |
|
| 2854 |
/// Move an instruction before another. |
2854 |
/// Move an instruction before another. |
| 2855 |
class InstructionMoveBefore : public TypePromotionAction { |
2855 |
class InstructionMoveBefore : public TypePromotionAction { |
| 2856 |
/// Original position of the instruction. |
2856 |
/// Original position of the instruction. |
| 2857 |
InsertionHandler Position; |
2857 |
InsertionHandler Position; |
| 2858 |
|
2858 |
|
| 2859 |
public: |
2859 |
public: |
| 2860 |
/// Move \p Inst before \p Before. |
2860 |
/// Move \p Inst before \p Before. |
| 2861 |
InstructionMoveBefore(Instruction *Inst, Instruction *Before) |
2861 |
InstructionMoveBefore(Instruction *Inst, Instruction *Before) |
| 2862 |
: TypePromotionAction(Inst), Position(Inst) { |
2862 |
: TypePromotionAction(Inst), Position(Inst) { |
| 2863 |
LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before |
2863 |
LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before |
| 2864 |
<< "\n"); |
2864 |
<< "\n"); |
| 2865 |
Inst->moveBefore(Before); |
2865 |
Inst->moveBefore(Before); |
| 2866 |
} |
2866 |
} |
| 2867 |
|
2867 |
|
| 2868 |
/// Move the instruction back to its original position. |
2868 |
/// Move the instruction back to its original position. |
| 2869 |
void undo() override { |
2869 |
void undo() override { |
| 2870 |
LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); |
2870 |
LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); |
| 2871 |
Position.insert(Inst); |
2871 |
Position.insert(Inst); |
| 2872 |
} |
2872 |
} |
| 2873 |
}; |
2873 |
}; |
| 2874 |
|
2874 |
|
| 2875 |
/// Set the operand of an instruction with a new value. |
2875 |
/// Set the operand of an instruction with a new value. |
| 2876 |
class OperandSetter : public TypePromotionAction { |
2876 |
class OperandSetter : public TypePromotionAction { |
| 2877 |
/// Original operand of the instruction. |
2877 |
/// Original operand of the instruction. |
| 2878 |
Value *Origin; |
2878 |
Value *Origin; |
| 2879 |
|
2879 |
|
| 2880 |
/// Index of the modified instruction. |
2880 |
/// Index of the modified instruction. |
| 2881 |
unsigned Idx; |
2881 |
unsigned Idx; |
| 2882 |
|
2882 |
|
| 2883 |
public: |
2883 |
public: |
| 2884 |
/// Set \p Idx operand of \p Inst with \p NewVal. |
2884 |
/// Set \p Idx operand of \p Inst with \p NewVal. |
| 2885 |
OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) |
2885 |
OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) |
| 2886 |
: TypePromotionAction(Inst), Idx(Idx) { |
2886 |
: TypePromotionAction(Inst), Idx(Idx) { |
| 2887 |
LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" |
2887 |
LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" |
| 2888 |
<< "for:" << *Inst << "\n" |
2888 |
<< "for:" << *Inst << "\n" |
| 2889 |
<< "with:" << *NewVal << "\n"); |
2889 |
<< "with:" << *NewVal << "\n"); |
| 2890 |
Origin = Inst->getOperand(Idx); |
2890 |
Origin = Inst->getOperand(Idx); |
| 2891 |
Inst->setOperand(Idx, NewVal); |
2891 |
Inst->setOperand(Idx, NewVal); |
| 2892 |
} |
2892 |
} |
| 2893 |
|
2893 |
|
| 2894 |
/// Restore the original value of the instruction. |
2894 |
/// Restore the original value of the instruction. |
| 2895 |
void undo() override { |
2895 |
void undo() override { |
| 2896 |
LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" |
2896 |
LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" |
| 2897 |
<< "for: " << *Inst << "\n" |
2897 |
<< "for: " << *Inst << "\n" |
| 2898 |
<< "with: " << *Origin << "\n"); |
2898 |
<< "with: " << *Origin << "\n"); |
| 2899 |
Inst->setOperand(Idx, Origin); |
2899 |
Inst->setOperand(Idx, Origin); |
| 2900 |
} |
2900 |
} |
| 2901 |
}; |
2901 |
}; |
| 2902 |
|
2902 |
|
| 2903 |
/// Hide the operands of an instruction. |
2903 |
/// Hide the operands of an instruction. |
| 2904 |
/// Do as if this instruction was not using any of its operands. |
2904 |
/// Do as if this instruction was not using any of its operands. |
| 2905 |
class OperandsHider : public TypePromotionAction { |
2905 |
class OperandsHider : public TypePromotionAction { |
| 2906 |
/// The list of original operands. |
2906 |
/// The list of original operands. |
| 2907 |
SmallVector OriginalValues; |
2907 |
SmallVector OriginalValues; |
| 2908 |
|
2908 |
|
| 2909 |
public: |
2909 |
public: |
| 2910 |
/// Remove \p Inst from the uses of the operands of \p Inst. |
2910 |
/// Remove \p Inst from the uses of the operands of \p Inst. |
| 2911 |
OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { |
2911 |
OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { |
| 2912 |
LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); |
2912 |
LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); |
| 2913 |
unsigned NumOpnds = Inst->getNumOperands(); |
2913 |
unsigned NumOpnds = Inst->getNumOperands(); |
| 2914 |
OriginalValues.reserve(NumOpnds); |
2914 |
OriginalValues.reserve(NumOpnds); |
| 2915 |
for (unsigned It = 0; It < NumOpnds; ++It) { |
2915 |
for (unsigned It = 0; It < NumOpnds; ++It) { |
| 2916 |
// Save the current operand. |
2916 |
// Save the current operand. |
| 2917 |
Value *Val = Inst->getOperand(It); |
2917 |
Value *Val = Inst->getOperand(It); |
| 2918 |
OriginalValues.push_back(Val); |
2918 |
OriginalValues.push_back(Val); |
| 2919 |
// Set a dummy one. |
2919 |
// Set a dummy one. |
| 2920 |
// We could use OperandSetter here, but that would imply an overhead |
2920 |
// We could use OperandSetter here, but that would imply an overhead |
| 2921 |
// that we are not willing to pay. |
2921 |
// that we are not willing to pay. |
| 2922 |
Inst->setOperand(It, UndefValue::get(Val->getType())); |
2922 |
Inst->setOperand(It, UndefValue::get(Val->getType())); |
| 2923 |
} |
2923 |
} |
| 2924 |
} |
2924 |
} |
| 2925 |
|
2925 |
|
| 2926 |
/// Restore the original list of uses. |
2926 |
/// Restore the original list of uses. |
| 2927 |
void undo() override { |
2927 |
void undo() override { |
| 2928 |
LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); |
2928 |
LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); |
| 2929 |
for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) |
2929 |
for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) |
| 2930 |
Inst->setOperand(It, OriginalValues[It]); |
2930 |
Inst->setOperand(It, OriginalValues[It]); |
| 2931 |
} |
2931 |
} |
| 2932 |
}; |
2932 |
}; |
| 2933 |
|
2933 |
|
| 2934 |
/// Build a truncate instruction. |
2934 |
/// Build a truncate instruction. |
| 2935 |
class TruncBuilder : public TypePromotionAction { |
2935 |
class TruncBuilder : public TypePromotionAction { |
| 2936 |
Value *Val; |
2936 |
Value *Val; |
| 2937 |
|
2937 |
|
| 2938 |
public: |
2938 |
public: |
| 2939 |
/// Build a truncate instruction of \p Opnd producing a \p Ty |
2939 |
/// Build a truncate instruction of \p Opnd producing a \p Ty |
| 2940 |
/// result. |
2940 |
/// result. |
| 2941 |
/// trunc Opnd to Ty. |
2941 |
/// trunc Opnd to Ty. |
| 2942 |
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { |
2942 |
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { |
| 2943 |
IRBuilder<> Builder(Opnd); |
2943 |
IRBuilder<> Builder(Opnd); |
| 2944 |
Builder.SetCurrentDebugLocation(DebugLoc()); |
2944 |
Builder.SetCurrentDebugLocation(DebugLoc()); |
| 2945 |
Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); |
2945 |
Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); |
| 2946 |
LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); |
2946 |
LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); |
| 2947 |
} |
2947 |
} |
| 2948 |
|
2948 |
|
| 2949 |
/// Get the built value. |
2949 |
/// Get the built value. |
| 2950 |
Value *getBuiltValue() { return Val; } |
2950 |
Value *getBuiltValue() { return Val; } |
| 2951 |
|
2951 |
|
| 2952 |
/// Remove the built instruction. |
2952 |
/// Remove the built instruction. |
| 2953 |
void undo() override { |
2953 |
void undo() override { |
| 2954 |
LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); |
2954 |
LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); |
| 2955 |
if (Instruction *IVal = dyn_cast(Val)) |
2955 |
if (Instruction *IVal = dyn_cast(Val)) |
| 2956 |
IVal->eraseFromParent(); |
2956 |
IVal->eraseFromParent(); |
| 2957 |
} |
2957 |
} |
| 2958 |
}; |
2958 |
}; |
| 2959 |
|
2959 |
|
| 2960 |
/// Build a sign extension instruction. |
2960 |
/// Build a sign extension instruction. |
| 2961 |
class SExtBuilder : public TypePromotionAction { |
2961 |
class SExtBuilder : public TypePromotionAction { |
| 2962 |
Value *Val; |
2962 |
Value *Val; |
| 2963 |
|
2963 |
|
| 2964 |
public: |
2964 |
public: |
| 2965 |
/// Build a sign extension instruction of \p Opnd producing a \p Ty |
2965 |
/// Build a sign extension instruction of \p Opnd producing a \p Ty |
| 2966 |
/// result. |
2966 |
/// result. |
| 2967 |
/// sext Opnd to Ty. |
2967 |
/// sext Opnd to Ty. |
| 2968 |
SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) |
2968 |
SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) |
| 2969 |
: TypePromotionAction(InsertPt) { |
2969 |
: TypePromotionAction(InsertPt) { |
| 2970 |
IRBuilder<> Builder(InsertPt); |
2970 |
IRBuilder<> Builder(InsertPt); |
| 2971 |
Val = Builder.CreateSExt(Opnd, Ty, "promoted"); |
2971 |
Val = Builder.CreateSExt(Opnd, Ty, "promoted"); |
| 2972 |
LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); |
2972 |
LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); |
| 2973 |
} |
2973 |
} |
| 2974 |
|
2974 |
|
| 2975 |
/// Get the built value. |
2975 |
/// Get the built value. |
| 2976 |
Value *getBuiltValue() { return Val; } |
2976 |
Value *getBuiltValue() { return Val; } |
| 2977 |
|
2977 |
|
| 2978 |
/// Remove the built instruction. |
2978 |
/// Remove the built instruction. |
| 2979 |
void undo() override { |
2979 |
void undo() override { |
| 2980 |
LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); |
2980 |
LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); |
| 2981 |
if (Instruction *IVal = dyn_cast(Val)) |
2981 |
if (Instruction *IVal = dyn_cast(Val)) |
| 2982 |
IVal->eraseFromParent(); |
2982 |
IVal->eraseFromParent(); |
| 2983 |
} |
2983 |
} |
| 2984 |
}; |
2984 |
}; |
| 2985 |
|
2985 |
|
| 2986 |
/// Build a zero extension instruction. |
2986 |
/// Build a zero extension instruction. |
| 2987 |
class ZExtBuilder : public TypePromotionAction { |
2987 |
class ZExtBuilder : public TypePromotionAction { |
| 2988 |
Value *Val; |
2988 |
Value *Val; |
| 2989 |
|
2989 |
|
| 2990 |
public: |
2990 |
public: |
| 2991 |
/// Build a zero extension instruction of \p Opnd producing a \p Ty |
2991 |
/// Build a zero extension instruction of \p Opnd producing a \p Ty |
| 2992 |
/// result. |
2992 |
/// result. |
| 2993 |
/// zext Opnd to Ty. |
2993 |
/// zext Opnd to Ty. |
| 2994 |
ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) |
2994 |
ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) |
| 2995 |
: TypePromotionAction(InsertPt) { |
2995 |
: TypePromotionAction(InsertPt) { |
| 2996 |
IRBuilder<> Builder(InsertPt); |
2996 |
IRBuilder<> Builder(InsertPt); |
| 2997 |
Builder.SetCurrentDebugLocation(DebugLoc()); |
2997 |
Builder.SetCurrentDebugLocation(DebugLoc()); |
| 2998 |
Val = Builder.CreateZExt(Opnd, Ty, "promoted"); |
2998 |
Val = Builder.CreateZExt(Opnd, Ty, "promoted"); |
| 2999 |
LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); |
2999 |
LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); |
| 3000 |
} |
3000 |
} |
| 3001 |
|
3001 |
|
| 3002 |
/// Get the built value. |
3002 |
/// Get the built value. |
| 3003 |
Value *getBuiltValue() { return Val; } |
3003 |
Value *getBuiltValue() { return Val; } |
| 3004 |
|
3004 |
|
| 3005 |
/// Remove the built instruction. |
3005 |
/// Remove the built instruction. |
| 3006 |
void undo() override { |
3006 |
void undo() override { |
| 3007 |
LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); |
3007 |
LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); |
| 3008 |
if (Instruction *IVal = dyn_cast(Val)) |
3008 |
if (Instruction *IVal = dyn_cast(Val)) |
| 3009 |
IVal->eraseFromParent(); |
3009 |
IVal->eraseFromParent(); |
| 3010 |
} |
3010 |
} |
| 3011 |
}; |
3011 |
}; |
| 3012 |
|
3012 |
|
| 3013 |
/// Mutate an instruction to another type. |
3013 |
/// Mutate an instruction to another type. |
| 3014 |
class TypeMutator : public TypePromotionAction { |
3014 |
class TypeMutator : public TypePromotionAction { |
| 3015 |
/// Record the original type. |
3015 |
/// Record the original type. |
| 3016 |
Type *OrigTy; |
3016 |
Type *OrigTy; |
| 3017 |
|
3017 |
|
| 3018 |
public: |
3018 |
public: |
| 3019 |
/// Mutate the type of \p Inst into \p NewTy. |
3019 |
/// Mutate the type of \p Inst into \p NewTy. |
| 3020 |
TypeMutator(Instruction *Inst, Type *NewTy) |
3020 |
TypeMutator(Instruction *Inst, Type *NewTy) |
| 3021 |
: TypePromotionAction(Inst), OrigTy(Inst->getType()) { |
3021 |
: TypePromotionAction(Inst), OrigTy(Inst->getType()) { |
| 3022 |
LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy |
3022 |
LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy |
| 3023 |
<< "\n"); |
3023 |
<< "\n"); |
| 3024 |
Inst->mutateType(NewTy); |
3024 |
Inst->mutateType(NewTy); |
| 3025 |
} |
3025 |
} |
| 3026 |
|
3026 |
|
| 3027 |
/// Mutate the instruction back to its original type. |
3027 |
/// Mutate the instruction back to its original type. |
| 3028 |
void undo() override { |
3028 |
void undo() override { |
| 3029 |
LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy |
3029 |
LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy |
| 3030 |
<< "\n"); |
3030 |
<< "\n"); |
| 3031 |
Inst->mutateType(OrigTy); |
3031 |
Inst->mutateType(OrigTy); |
| 3032 |
} |
3032 |
} |
| 3033 |
}; |
3033 |
}; |
| 3034 |
|
3034 |
|
| 3035 |
/// Replace the uses of an instruction by another instruction. |
3035 |
/// Replace the uses of an instruction by another instruction. |
| 3036 |
class UsesReplacer : public TypePromotionAction { |
3036 |
class UsesReplacer : public TypePromotionAction { |
| 3037 |
/// Helper structure to keep track of the replaced uses. |
3037 |
/// Helper structure to keep track of the replaced uses. |
| 3038 |
struct InstructionAndIdx { |
3038 |
struct InstructionAndIdx { |
| 3039 |
/// The instruction using the instruction. |
3039 |
/// The instruction using the instruction. |
| 3040 |
Instruction *Inst; |
3040 |
Instruction *Inst; |
| 3041 |
|
3041 |
|
| 3042 |
/// The index where this instruction is used for Inst. |
3042 |
/// The index where this instruction is used for Inst. |
| 3043 |
unsigned Idx; |
3043 |
unsigned Idx; |
| 3044 |
|
3044 |
|
| 3045 |
InstructionAndIdx(Instruction *Inst, unsigned Idx) |
3045 |
InstructionAndIdx(Instruction *Inst, unsigned Idx) |
| 3046 |
: Inst(Inst), Idx(Idx) {} |
3046 |
: Inst(Inst), Idx(Idx) {} |
| 3047 |
}; |
3047 |
}; |
| 3048 |
|
3048 |
|
| 3049 |
/// Keep track of the original uses (pair Instruction, Index). |
3049 |
/// Keep track of the original uses (pair Instruction, Index). |
| 3050 |
SmallVector OriginalUses; |
3050 |
SmallVector OriginalUses; |
| 3051 |
/// Keep track of the debug users. |
3051 |
/// Keep track of the debug users. |
| 3052 |
SmallVector DbgValues; |
3052 |
SmallVector DbgValues; |
| 3053 |
|
3053 |
|
| 3054 |
/// Keep track of the new value so that we can undo it by replacing |
3054 |
/// Keep track of the new value so that we can undo it by replacing |
| 3055 |
/// instances of the new value with the original value. |
3055 |
/// instances of the new value with the original value. |
| 3056 |
Value *New; |
3056 |
Value *New; |
| 3057 |
|
3057 |
|
| 3058 |
using use_iterator = SmallVectorImpl::iterator; |
3058 |
using use_iterator = SmallVectorImpl::iterator; |
| 3059 |
|
3059 |
|
| 3060 |
public: |
3060 |
public: |
| 3061 |
/// Replace all the use of \p Inst by \p New. |
3061 |
/// Replace all the use of \p Inst by \p New. |
| 3062 |
UsesReplacer(Instruction *Inst, Value *New) |
3062 |
UsesReplacer(Instruction *Inst, Value *New) |
| 3063 |
: TypePromotionAction(Inst), New(New) { |
3063 |
: TypePromotionAction(Inst), New(New) { |
| 3064 |
LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New |
3064 |
LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New |
| 3065 |
<< "\n"); |
3065 |
<< "\n"); |
| 3066 |
// Record the original uses. |
3066 |
// Record the original uses. |
| 3067 |
for (Use &U : Inst->uses()) { |
3067 |
for (Use &U : Inst->uses()) { |
| 3068 |
Instruction *UserI = cast(U.getUser()); |
3068 |
Instruction *UserI = cast(U.getUser()); |
| 3069 |
OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); |
3069 |
OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); |
| 3070 |
} |
3070 |
} |
| 3071 |
// Record the debug uses separately. They are not in the instruction's |
3071 |
// Record the debug uses separately. They are not in the instruction's |
| 3072 |
// use list, but they are replaced by RAUW. |
3072 |
// use list, but they are replaced by RAUW. |
| 3073 |
findDbgValues(DbgValues, Inst); |
3073 |
findDbgValues(DbgValues, Inst); |
| 3074 |
|
3074 |
|
| 3075 |
// Now, we can replace the uses. |
3075 |
// Now, we can replace the uses. |
| 3076 |
Inst->replaceAllUsesWith(New); |
3076 |
Inst->replaceAllUsesWith(New); |
| 3077 |
} |
3077 |
} |
| 3078 |
|
3078 |
|
| 3079 |
/// Reassign the original uses of Inst to Inst. |
3079 |
/// Reassign the original uses of Inst to Inst. |
| 3080 |
void undo() override { |
3080 |
void undo() override { |
| 3081 |
LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); |
3081 |
LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); |
| 3082 |
for (InstructionAndIdx &Use : OriginalUses) |
3082 |
for (InstructionAndIdx &Use : OriginalUses) |
| 3083 |
Use.Inst->setOperand(Use.Idx, Inst); |
3083 |
Use.Inst->setOperand(Use.Idx, Inst); |
| 3084 |
// RAUW has replaced all original uses with references to the new value, |
3084 |
// RAUW has replaced all original uses with references to the new value, |
| 3085 |
// including the debug uses. Since we are undoing the replacements, |
3085 |
// including the debug uses. Since we are undoing the replacements, |
| 3086 |
// the original debug uses must also be reinstated to maintain the |
3086 |
// the original debug uses must also be reinstated to maintain the |
| 3087 |
// correctness and utility of debug value instructions. |
3087 |
// correctness and utility of debug value instructions. |
| 3088 |
for (auto *DVI : DbgValues) |
3088 |
for (auto *DVI : DbgValues) |
| 3089 |
DVI->replaceVariableLocationOp(New, Inst); |
3089 |
DVI->replaceVariableLocationOp(New, Inst); |
| 3090 |
} |
3090 |
} |
| 3091 |
}; |
3091 |
}; |
| 3092 |
|
3092 |
|
| 3093 |
/// Remove an instruction from the IR. |
3093 |
/// Remove an instruction from the IR. |
| 3094 |
class InstructionRemover : public TypePromotionAction { |
3094 |
class InstructionRemover : public TypePromotionAction { |
| 3095 |
/// Original position of the instruction. |
3095 |
/// Original position of the instruction. |
| 3096 |
InsertionHandler Inserter; |
3096 |
InsertionHandler Inserter; |
| 3097 |
|
3097 |
|
| 3098 |
/// Helper structure to hide all the link to the instruction. In other |
3098 |
/// Helper structure to hide all the link to the instruction. In other |
| 3099 |
/// words, this helps to do as if the instruction was removed. |
3099 |
/// words, this helps to do as if the instruction was removed. |
| 3100 |
OperandsHider Hider; |
3100 |
OperandsHider Hider; |
| 3101 |
|
3101 |
|
| 3102 |
/// Keep track of the uses replaced, if any. |
3102 |
/// Keep track of the uses replaced, if any. |
| 3103 |
UsesReplacer *Replacer = nullptr; |
3103 |
UsesReplacer *Replacer = nullptr; |
| 3104 |
|
3104 |
|
| 3105 |
/// Keep track of instructions removed. |
3105 |
/// Keep track of instructions removed. |
| 3106 |
SetOfInstrs &RemovedInsts; |
3106 |
SetOfInstrs &RemovedInsts; |
| 3107 |
|
3107 |
|
| 3108 |
public: |
3108 |
public: |
| 3109 |
/// Remove all reference of \p Inst and optionally replace all its |
3109 |
/// Remove all reference of \p Inst and optionally replace all its |
| 3110 |
/// uses with New. |
3110 |
/// uses with New. |
| 3111 |
/// \p RemovedInsts Keep track of the instructions removed by this Action. |
3111 |
/// \p RemovedInsts Keep track of the instructions removed by this Action. |
| 3112 |
/// \pre If !Inst->use_empty(), then New != nullptr |
3112 |
/// \pre If !Inst->use_empty(), then New != nullptr |
| 3113 |
InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, |
3113 |
InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, |
| 3114 |
Value *New = nullptr) |
3114 |
Value *New = nullptr) |
| 3115 |
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), |
3115 |
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), |
| 3116 |
RemovedInsts(RemovedInsts) { |
3116 |
RemovedInsts(RemovedInsts) { |
| 3117 |
if (New) |
3117 |
if (New) |
| 3118 |
Replacer = new UsesReplacer(Inst, New); |
3118 |
Replacer = new UsesReplacer(Inst, New); |
| 3119 |
LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); |
3119 |
LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); |
| 3120 |
RemovedInsts.insert(Inst); |
3120 |
RemovedInsts.insert(Inst); |
| 3121 |
/// The instructions removed here will be freed after completing |
3121 |
/// The instructions removed here will be freed after completing |
| 3122 |
/// optimizeBlock() for all blocks as we need to keep track of the |
3122 |
/// optimizeBlock() for all blocks as we need to keep track of the |
| 3123 |
/// removed instructions during promotion. |
3123 |
/// removed instructions during promotion. |
| 3124 |
Inst->removeFromParent(); |
3124 |
Inst->removeFromParent(); |
| 3125 |
} |
3125 |
} |
| 3126 |
|
3126 |
|
| 3127 |
~InstructionRemover() override { delete Replacer; } |
3127 |
~InstructionRemover() override { delete Replacer; } |
| 3128 |
|
3128 |
|
| 3129 |
InstructionRemover &operator=(const InstructionRemover &other) = delete; |
3129 |
InstructionRemover &operator=(const InstructionRemover &other) = delete; |
| 3130 |
InstructionRemover(const InstructionRemover &other) = delete; |
3130 |
InstructionRemover(const InstructionRemover &other) = delete; |
| 3131 |
|
3131 |
|
| 3132 |
/// Resurrect the instruction and reassign it to the proper uses if |
3132 |
/// Resurrect the instruction and reassign it to the proper uses if |
| 3133 |
/// new value was provided when build this action. |
3133 |
/// new value was provided when build this action. |
| 3134 |
void undo() override { |
3134 |
void undo() override { |
| 3135 |
LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); |
3135 |
LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); |
| 3136 |
Inserter.insert(Inst); |
3136 |
Inserter.insert(Inst); |
| 3137 |
if (Replacer) |
3137 |
if (Replacer) |
| 3138 |
Replacer->undo(); |
3138 |
Replacer->undo(); |
| 3139 |
Hider.undo(); |
3139 |
Hider.undo(); |
| 3140 |
RemovedInsts.erase(Inst); |
3140 |
RemovedInsts.erase(Inst); |
| 3141 |
} |
3141 |
} |
| 3142 |
}; |
3142 |
}; |
| 3143 |
|
3143 |
|
| 3144 |
public: |
3144 |
public: |
| 3145 |
/// Restoration point. |
3145 |
/// Restoration point. |
| 3146 |
/// The restoration point is a pointer to an action instead of an iterator |
3146 |
/// The restoration point is a pointer to an action instead of an iterator |
| 3147 |
/// because the iterator may be invalidated but not the pointer. |
3147 |
/// because the iterator may be invalidated but not the pointer. |
| 3148 |
using ConstRestorationPt = const TypePromotionAction *; |
3148 |
using ConstRestorationPt = const TypePromotionAction *; |
| 3149 |
|
3149 |
|
| 3150 |
TypePromotionTransaction(SetOfInstrs &RemovedInsts) |
3150 |
TypePromotionTransaction(SetOfInstrs &RemovedInsts) |
| 3151 |
: RemovedInsts(RemovedInsts) {} |
3151 |
: RemovedInsts(RemovedInsts) {} |
| 3152 |
|
3152 |
|
| 3153 |
/// Advocate every changes made in that transaction. Return true if any change |
3153 |
/// Advocate every changes made in that transaction. Return true if any change |
| 3154 |
/// happen. |
3154 |
/// happen. |
| 3155 |
bool commit(); |
3155 |
bool commit(); |
| 3156 |
|
3156 |
|
| 3157 |
/// Undo all the changes made after the given point. |
3157 |
/// Undo all the changes made after the given point. |
| 3158 |
void rollback(ConstRestorationPt Point); |
3158 |
void rollback(ConstRestorationPt Point); |
| 3159 |
|
3159 |
|
| 3160 |
/// Get the current restoration point. |
3160 |
/// Get the current restoration point. |
| 3161 |
ConstRestorationPt getRestorationPoint() const; |
3161 |
ConstRestorationPt getRestorationPoint() const; |
| 3162 |
|
3162 |
|
| 3163 |
/// \name API for IR modification with state keeping to support rollback. |
3163 |
/// \name API for IR modification with state keeping to support rollback. |
| 3164 |
/// @{ |
3164 |
/// @{ |
| 3165 |
/// Same as Instruction::setOperand. |
3165 |
/// Same as Instruction::setOperand. |
| 3166 |
void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); |
3166 |
void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); |
| 3167 |
|
3167 |
|
| 3168 |
/// Same as Instruction::eraseFromParent. |
3168 |
/// Same as Instruction::eraseFromParent. |
| 3169 |
void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); |
3169 |
void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); |
| 3170 |
|
3170 |
|
| 3171 |
/// Same as Value::replaceAllUsesWith. |
3171 |
/// Same as Value::replaceAllUsesWith. |
| 3172 |
void replaceAllUsesWith(Instruction *Inst, Value *New); |
3172 |
void replaceAllUsesWith(Instruction *Inst, Value *New); |
| 3173 |
|
3173 |
|
| 3174 |
/// Same as Value::mutateType. |
3174 |
/// Same as Value::mutateType. |
| 3175 |
void mutateType(Instruction *Inst, Type *NewTy); |
3175 |
void mutateType(Instruction *Inst, Type *NewTy); |
| 3176 |
|
3176 |
|
| 3177 |
/// Same as IRBuilder::createTrunc. |
3177 |
/// Same as IRBuilder::createTrunc. |
| 3178 |
Value *createTrunc(Instruction *Opnd, Type *Ty); |
3178 |
Value *createTrunc(Instruction *Opnd, Type *Ty); |
| 3179 |
|
3179 |
|
| 3180 |
/// Same as IRBuilder::createSExt. |
3180 |
/// Same as IRBuilder::createSExt. |
| 3181 |
Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); |
3181 |
Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); |
| 3182 |
|
3182 |
|
| 3183 |
/// Same as IRBuilder::createZExt. |
3183 |
/// Same as IRBuilder::createZExt. |
| 3184 |
Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); |
3184 |
Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); |
| 3185 |
|
3185 |
|
| 3186 |
/// Same as Instruction::moveBefore. |
3186 |
/// Same as Instruction::moveBefore. |
| 3187 |
void moveBefore(Instruction *Inst, Instruction *Before); |
3187 |
void moveBefore(Instruction *Inst, Instruction *Before); |
| 3188 |
/// @} |
3188 |
/// @} |
| 3189 |
|
3189 |
|
| 3190 |
private: |
3190 |
private: |
| 3191 |
/// The ordered list of actions made so far. |
3191 |
/// The ordered list of actions made so far. |
| 3192 |
SmallVector, 16> Actions; |
3192 |
SmallVector, 16> Actions; |
| 3193 |
|
3193 |
|
| 3194 |
using CommitPt = |
3194 |
using CommitPt = |
| 3195 |
SmallVectorImpl>::iterator; |
3195 |
SmallVectorImpl>::iterator; |
| 3196 |
|
3196 |
|
| 3197 |
SetOfInstrs &RemovedInsts; |
3197 |
SetOfInstrs &RemovedInsts; |
| 3198 |
}; |
3198 |
}; |
| 3199 |
|
3199 |
|
| 3200 |
} // end anonymous namespace |
3200 |
} // end anonymous namespace |
| 3201 |
|
3201 |
|
| 3202 |
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, |
3202 |
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, |
| 3203 |
Value *NewVal) { |
3203 |
Value *NewVal) { |
| 3204 |
Actions.push_back(std::make_unique( |
3204 |
Actions.push_back(std::make_unique( |
| 3205 |
Inst, Idx, NewVal)); |
3205 |
Inst, Idx, NewVal)); |
| 3206 |
} |
3206 |
} |
| 3207 |
|
3207 |
|
| 3208 |
void TypePromotionTransaction::eraseInstruction(Instruction *Inst, |
3208 |
void TypePromotionTransaction::eraseInstruction(Instruction *Inst, |
| 3209 |
Value *NewVal) { |
3209 |
Value *NewVal) { |
| 3210 |
Actions.push_back( |
3210 |
Actions.push_back( |
| 3211 |
std::make_unique( |
3211 |
std::make_unique( |
| 3212 |
Inst, RemovedInsts, NewVal)); |
3212 |
Inst, RemovedInsts, NewVal)); |
| 3213 |
} |
3213 |
} |
| 3214 |
|
3214 |
|
| 3215 |
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, |
3215 |
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, |
| 3216 |
Value *New) { |
3216 |
Value *New) { |
| 3217 |
Actions.push_back( |
3217 |
Actions.push_back( |
| 3218 |
std::make_unique(Inst, New)); |
3218 |
std::make_unique(Inst, New)); |
| 3219 |
} |
3219 |
} |
| 3220 |
|
3220 |
|
| 3221 |
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { |
3221 |
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { |
| 3222 |
Actions.push_back( |
3222 |
Actions.push_back( |
| 3223 |
std::make_unique(Inst, NewTy)); |
3223 |
std::make_unique(Inst, NewTy)); |
| 3224 |
} |
3224 |
} |
| 3225 |
|
3225 |
|
| 3226 |
Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { |
3226 |
Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { |
| 3227 |
std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); |
3227 |
std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); |
| 3228 |
Value *Val = Ptr->getBuiltValue(); |
3228 |
Value *Val = Ptr->getBuiltValue(); |
| 3229 |
Actions.push_back(std::move(Ptr)); |
3229 |
Actions.push_back(std::move(Ptr)); |
| 3230 |
return Val; |
3230 |
return Val; |
| 3231 |
} |
3231 |
} |
| 3232 |
|
3232 |
|
| 3233 |
Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, |
3233 |
Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, |
| 3234 |
Type *Ty) { |
3234 |
Type *Ty) { |
| 3235 |
std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); |
3235 |
std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); |
| 3236 |
Value *Val = Ptr->getBuiltValue(); |
3236 |
Value *Val = Ptr->getBuiltValue(); |
| 3237 |
Actions.push_back(std::move(Ptr)); |
3237 |
Actions.push_back(std::move(Ptr)); |
| 3238 |
return Val; |
3238 |
return Val; |
| 3239 |
} |
3239 |
} |
| 3240 |
|
3240 |
|
| 3241 |
Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd, |
3241 |
Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd, |
| 3242 |
Type *Ty) { |
3242 |
Type *Ty) { |
| 3243 |
std::unique_ptr Ptr(new ZExtBuilder(Inst, Opnd, Ty)); |
3243 |
std::unique_ptr Ptr(new ZExtBuilder(Inst, Opnd, Ty)); |
| 3244 |
Value *Val = Ptr->getBuiltValue(); |
3244 |
Value *Val = Ptr->getBuiltValue(); |
| 3245 |
Actions.push_back(std::move(Ptr)); |
3245 |
Actions.push_back(std::move(Ptr)); |
| 3246 |
return Val; |
3246 |
return Val; |
| 3247 |
} |
3247 |
} |
| 3248 |
|
3248 |
|
| 3249 |
void TypePromotionTransaction::moveBefore(Instruction *Inst, |
3249 |
void TypePromotionTransaction::moveBefore(Instruction *Inst, |
| 3250 |
Instruction *Before) { |
3250 |
Instruction *Before) { |
| 3251 |
Actions.push_back( |
3251 |
Actions.push_back( |
| 3252 |
std::make_unique( |
3252 |
std::make_unique( |
| 3253 |
Inst, Before)); |
3253 |
Inst, Before)); |
| 3254 |
} |
3254 |
} |
| 3255 |
|
3255 |
|
| 3256 |
TypePromotionTransaction::ConstRestorationPt |
3256 |
TypePromotionTransaction::ConstRestorationPt |
| 3257 |
TypePromotionTransaction::getRestorationPoint() const { |
3257 |
TypePromotionTransaction::getRestorationPoint() const { |
| 3258 |
return !Actions.empty() ? Actions.back().get() : nullptr; |
3258 |
return !Actions.empty() ? Actions.back().get() : nullptr; |
| 3259 |
} |
3259 |
} |
| 3260 |
|
3260 |
|
| 3261 |
bool TypePromotionTransaction::commit() { |
3261 |
bool TypePromotionTransaction::commit() { |
| 3262 |
for (std::unique_ptr &Action : Actions) |
3262 |
for (std::unique_ptr &Action : Actions) |
| 3263 |
Action->commit(); |
3263 |
Action->commit(); |
| 3264 |
bool Modified = !Actions.empty(); |
3264 |
bool Modified = !Actions.empty(); |
| 3265 |
Actions.clear(); |
3265 |
Actions.clear(); |
| 3266 |
return Modified; |
3266 |
return Modified; |
| 3267 |
} |
3267 |
} |
| 3268 |
|
3268 |
|
| 3269 |
void TypePromotionTransaction::rollback( |
3269 |
void TypePromotionTransaction::rollback( |
| 3270 |
TypePromotionTransaction::ConstRestorationPt Point) { |
3270 |
TypePromotionTransaction::ConstRestorationPt Point) { |
| 3271 |
while (!Actions.empty() && Point != Actions.back().get()) { |
3271 |
while (!Actions.empty() && Point != Actions.back().get()) { |
| 3272 |
std::unique_ptr Curr = Actions.pop_back_val(); |
3272 |
std::unique_ptr Curr = Actions.pop_back_val(); |
| 3273 |
Curr->undo(); |
3273 |
Curr->undo(); |
| 3274 |
} |
3274 |
} |
| 3275 |
} |
3275 |
} |
| 3276 |
|
3276 |
|
| 3277 |
namespace { |
3277 |
namespace { |
| 3278 |
|
3278 |
|
| 3279 |
/// A helper class for matching addressing modes. |
3279 |
/// A helper class for matching addressing modes. |
| 3280 |
/// |
3280 |
/// |
| 3281 |
/// This encapsulates the logic for matching the target-legal addressing modes. |
3281 |
/// This encapsulates the logic for matching the target-legal addressing modes. |
| 3282 |
class AddressingModeMatcher { |
3282 |
class AddressingModeMatcher { |
| 3283 |
SmallVectorImpl &AddrModeInsts; |
3283 |
SmallVectorImpl &AddrModeInsts; |
| 3284 |
const TargetLowering &TLI; |
3284 |
const TargetLowering &TLI; |
| 3285 |
const TargetRegisterInfo &TRI; |
3285 |
const TargetRegisterInfo &TRI; |
| 3286 |
const DataLayout &DL; |
3286 |
const DataLayout &DL; |
| 3287 |
const LoopInfo &LI; |
3287 |
const LoopInfo &LI; |
| 3288 |
const std::function getDTFn; |
3288 |
const std::function getDTFn; |
| 3289 |
|
3289 |
|
| 3290 |
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and |
3290 |
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and |
| 3291 |
/// the memory instruction that we're computing this address for. |
3291 |
/// the memory instruction that we're computing this address for. |
| 3292 |
Type *AccessTy; |
3292 |
Type *AccessTy; |
| 3293 |
unsigned AddrSpace; |
3293 |
unsigned AddrSpace; |
| 3294 |
Instruction *MemoryInst; |
3294 |
Instruction *MemoryInst; |
| 3295 |
|
3295 |
|
| 3296 |
/// This is the addressing mode that we're building up. This is |
3296 |
/// This is the addressing mode that we're building up. This is |
| 3297 |
/// part of the return value of this addressing mode matching stuff. |
3297 |
/// part of the return value of this addressing mode matching stuff. |
| 3298 |
ExtAddrMode &AddrMode; |
3298 |
ExtAddrMode &AddrMode; |
| 3299 |
|
3299 |
|
| 3300 |
/// The instructions inserted by other CodeGenPrepare optimizations. |
3300 |
/// The instructions inserted by other CodeGenPrepare optimizations. |
| 3301 |
const SetOfInstrs &InsertedInsts; |
3301 |
const SetOfInstrs &InsertedInsts; |
| 3302 |
|
3302 |
|
| 3303 |
/// A map from the instructions to their type before promotion. |
3303 |
/// A map from the instructions to their type before promotion. |
| 3304 |
InstrToOrigTy &PromotedInsts; |
3304 |
InstrToOrigTy &PromotedInsts; |
| 3305 |
|
3305 |
|
| 3306 |
/// The ongoing transaction where every action should be registered. |
3306 |
/// The ongoing transaction where every action should be registered. |
| 3307 |
TypePromotionTransaction &TPT; |
3307 |
TypePromotionTransaction &TPT; |
| 3308 |
|
3308 |
|
| 3309 |
// A GEP which has too large offset to be folded into the addressing mode. |
3309 |
// A GEP which has too large offset to be folded into the addressing mode. |
| 3310 |
std::pair, int64_t> &LargeOffsetGEP; |
3310 |
std::pair, int64_t> &LargeOffsetGEP; |
| 3311 |
|
3311 |
|
| 3312 |
/// This is set to true when we should not do profitability checks. |
3312 |
/// This is set to true when we should not do profitability checks. |
| 3313 |
/// When true, IsProfitableToFoldIntoAddressingMode always returns true. |
3313 |
/// When true, IsProfitableToFoldIntoAddressingMode always returns true. |
| 3314 |
bool IgnoreProfitability; |
3314 |
bool IgnoreProfitability; |
| 3315 |
|
3315 |
|
| 3316 |
/// True if we are optimizing for size. |
3316 |
/// True if we are optimizing for size. |
| 3317 |
bool OptSize = false; |
3317 |
bool OptSize = false; |
| 3318 |
|
3318 |
|
| 3319 |
ProfileSummaryInfo *PSI; |
3319 |
ProfileSummaryInfo *PSI; |
| 3320 |
BlockFrequencyInfo *BFI; |
3320 |
BlockFrequencyInfo *BFI; |
| 3321 |
|
3321 |
|
| 3322 |
AddressingModeMatcher( |
3322 |
AddressingModeMatcher( |
| 3323 |
SmallVectorImpl &AMI, const TargetLowering &TLI, |
3323 |
SmallVectorImpl &AMI, const TargetLowering &TLI, |
| 3324 |
const TargetRegisterInfo &TRI, const LoopInfo &LI, |
3324 |
const TargetRegisterInfo &TRI, const LoopInfo &LI, |
| 3325 |
const std::function getDTFn, Type *AT, |
3325 |
const std::function getDTFn, Type *AT, |
| 3326 |
unsigned AS, Instruction *MI, ExtAddrMode &AM, |
3326 |
unsigned AS, Instruction *MI, ExtAddrMode &AM, |
| 3327 |
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, |
3327 |
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, |
| 3328 |
TypePromotionTransaction &TPT, |
3328 |
TypePromotionTransaction &TPT, |
| 3329 |
std::pair, int64_t> &LargeOffsetGEP, |
3329 |
std::pair, int64_t> &LargeOffsetGEP, |
| 3330 |
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) |
3330 |
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) |
| 3331 |
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI), |
3331 |
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI), |
| 3332 |
DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), |
3332 |
DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), |
| 3333 |
AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), |
3333 |
AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), |
| 3334 |
InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), |
3334 |
InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), |
| 3335 |
LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { |
3335 |
LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { |
| 3336 |
IgnoreProfitability = false; |
3336 |
IgnoreProfitability = false; |
| 3337 |
} |
3337 |
} |
| 3338 |
|
3338 |
|
| 3339 |
public: |
3339 |
public: |
| 3340 |
/// Find the maximal addressing mode that a load/store of V can fold, |
3340 |
/// Find the maximal addressing mode that a load/store of V can fold, |
| 3341 |
/// give an access type of AccessTy. This returns a list of involved |
3341 |
/// give an access type of AccessTy. This returns a list of involved |
| 3342 |
/// instructions in AddrModeInsts. |
3342 |
/// instructions in AddrModeInsts. |
| 3343 |
/// \p InsertedInsts The instructions inserted by other CodeGenPrepare |
3343 |
/// \p InsertedInsts The instructions inserted by other CodeGenPrepare |
| 3344 |
/// optimizations. |
3344 |
/// optimizations. |
| 3345 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
3345 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
| 3346 |
/// \p The ongoing transaction where every action should be registered. |
3346 |
/// \p The ongoing transaction where every action should be registered. |
| 3347 |
static ExtAddrMode |
3347 |
static ExtAddrMode |
| 3348 |
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, |
3348 |
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, |
| 3349 |
SmallVectorImpl &AddrModeInsts, |
3349 |
SmallVectorImpl &AddrModeInsts, |
| 3350 |
const TargetLowering &TLI, const LoopInfo &LI, |
3350 |
const TargetLowering &TLI, const LoopInfo &LI, |
| 3351 |
const std::function getDTFn, |
3351 |
const std::function getDTFn, |
| 3352 |
const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, |
3352 |
const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, |
| 3353 |
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, |
3353 |
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, |
| 3354 |
std::pair, int64_t> &LargeOffsetGEP, |
3354 |
std::pair, int64_t> &LargeOffsetGEP, |
| 3355 |
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { |
3355 |
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { |
| 3356 |
ExtAddrMode Result; |
3356 |
ExtAddrMode Result; |
| 3357 |
|
3357 |
|
| 3358 |
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn, |
3358 |
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn, |
| 3359 |
AccessTy, AS, MemoryInst, Result, |
3359 |
AccessTy, AS, MemoryInst, Result, |
| 3360 |
InsertedInsts, PromotedInsts, TPT, |
3360 |
InsertedInsts, PromotedInsts, TPT, |
| 3361 |
LargeOffsetGEP, OptSize, PSI, BFI) |
3361 |
LargeOffsetGEP, OptSize, PSI, BFI) |
| 3362 |
.matchAddr(V, 0); |
3362 |
.matchAddr(V, 0); |
| 3363 |
(void)Success; |
3363 |
(void)Success; |
| 3364 |
assert(Success && "Couldn't select *anything*?"); |
3364 |
assert(Success && "Couldn't select *anything*?"); |
| 3365 |
return Result; |
3365 |
return Result; |
| 3366 |
} |
3366 |
} |
| 3367 |
|
3367 |
|
| 3368 |
private: |
3368 |
private: |
| 3369 |
bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); |
3369 |
bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); |
| 3370 |
bool matchAddr(Value *Addr, unsigned Depth); |
3370 |
bool matchAddr(Value *Addr, unsigned Depth); |
| 3371 |
bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, |
3371 |
bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, |
| 3372 |
bool *MovedAway = nullptr); |
3372 |
bool *MovedAway = nullptr); |
| 3373 |
bool isProfitableToFoldIntoAddressingMode(Instruction *I, |
3373 |
bool isProfitableToFoldIntoAddressingMode(Instruction *I, |
| 3374 |
ExtAddrMode &AMBefore, |
3374 |
ExtAddrMode &AMBefore, |
| 3375 |
ExtAddrMode &AMAfter); |
3375 |
ExtAddrMode &AMAfter); |
| 3376 |
bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); |
3376 |
bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); |
| 3377 |
bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, |
3377 |
bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, |
| 3378 |
Value *PromotedOperand) const; |
3378 |
Value *PromotedOperand) const; |
| 3379 |
}; |
3379 |
}; |
| 3380 |
|
3380 |
|
| 3381 |
class PhiNodeSet; |
3381 |
class PhiNodeSet; |
| 3382 |
|
3382 |
|
| 3383 |
/// An iterator for PhiNodeSet. |
3383 |
/// An iterator for PhiNodeSet. |
| 3384 |
class PhiNodeSetIterator { |
3384 |
class PhiNodeSetIterator { |
| 3385 |
PhiNodeSet *const Set; |
3385 |
PhiNodeSet *const Set; |
| 3386 |
size_t CurrentIndex = 0; |
3386 |
size_t CurrentIndex = 0; |
| 3387 |
|
3387 |
|
| 3388 |
public: |
3388 |
public: |
| 3389 |
/// The constructor. Start should point to either a valid element, or be equal |
3389 |
/// The constructor. Start should point to either a valid element, or be equal |
| 3390 |
/// to the size of the underlying SmallVector of the PhiNodeSet. |
3390 |
/// to the size of the underlying SmallVector of the PhiNodeSet. |
| 3391 |
PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start); |
3391 |
PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start); |
| 3392 |
PHINode *operator*() const; |
3392 |
PHINode *operator*() const; |
| 3393 |
PhiNodeSetIterator &operator++(); |
3393 |
PhiNodeSetIterator &operator++(); |
| 3394 |
bool operator==(const PhiNodeSetIterator &RHS) const; |
3394 |
bool operator==(const PhiNodeSetIterator &RHS) const; |
| 3395 |
bool operator!=(const PhiNodeSetIterator &RHS) const; |
3395 |
bool operator!=(const PhiNodeSetIterator &RHS) const; |
| 3396 |
}; |
3396 |
}; |
| 3397 |
|
3397 |
|
| 3398 |
/// Keeps a set of PHINodes. |
3398 |
/// Keeps a set of PHINodes. |
| 3399 |
/// |
3399 |
/// |
| 3400 |
/// This is a minimal set implementation for a specific use case: |
3400 |
/// This is a minimal set implementation for a specific use case: |
| 3401 |
/// It is very fast when there are very few elements, but also provides good |
3401 |
/// It is very fast when there are very few elements, but also provides good |
| 3402 |
/// performance when there are many. It is similar to SmallPtrSet, but also |
3402 |
/// performance when there are many. It is similar to SmallPtrSet, but also |
| 3403 |
/// provides iteration by insertion order, which is deterministic and stable |
3403 |
/// provides iteration by insertion order, which is deterministic and stable |
| 3404 |
/// across runs. It is also similar to SmallSetVector, but provides removing |
3404 |
/// across runs. It is also similar to SmallSetVector, but provides removing |
| 3405 |
/// elements in O(1) time. This is achieved by not actually removing the element |
3405 |
/// elements in O(1) time. This is achieved by not actually removing the element |
| 3406 |
/// from the underlying vector, so comes at the cost of using more memory, but |
3406 |
/// from the underlying vector, so comes at the cost of using more memory, but |
| 3407 |
/// that is fine, since PhiNodeSets are used as short lived objects. |
3407 |
/// that is fine, since PhiNodeSets are used as short lived objects. |
| 3408 |
class PhiNodeSet { |
3408 |
class PhiNodeSet { |
| 3409 |
friend class PhiNodeSetIterator; |
3409 |
friend class PhiNodeSetIterator; |
| 3410 |
|
3410 |
|
| 3411 |
using MapType = SmallDenseMap; |
3411 |
using MapType = SmallDenseMap; |
| 3412 |
using iterator = PhiNodeSetIterator; |
3412 |
using iterator = PhiNodeSetIterator; |
| 3413 |
|
3413 |
|
| 3414 |
/// Keeps the elements in the order of their insertion in the underlying |
3414 |
/// Keeps the elements in the order of their insertion in the underlying |
| 3415 |
/// vector. To achieve constant time removal, it never deletes any element. |
3415 |
/// vector. To achieve constant time removal, it never deletes any element. |
| 3416 |
SmallVector NodeList; |
3416 |
SmallVector NodeList; |
| 3417 |
|
3417 |
|
| 3418 |
/// Keeps the elements in the underlying set implementation. This (and not the |
3418 |
/// Keeps the elements in the underlying set implementation. This (and not the |
| 3419 |
/// NodeList defined above) is the source of truth on whether an element |
3419 |
/// NodeList defined above) is the source of truth on whether an element |
| 3420 |
/// is actually in the collection. |
3420 |
/// is actually in the collection. |
| 3421 |
MapType NodeMap; |
3421 |
MapType NodeMap; |
| 3422 |
|
3422 |
|
| 3423 |
/// Points to the first valid (not deleted) element when the set is not empty |
3423 |
/// Points to the first valid (not deleted) element when the set is not empty |
| 3424 |
/// and the value is not zero. Equals to the size of the underlying vector |
3424 |
/// and the value is not zero. Equals to the size of the underlying vector |
| 3425 |
/// when the set is empty. When the value is 0, as in the beginning, the |
3425 |
/// when the set is empty. When the value is 0, as in the beginning, the |
| 3426 |
/// first element may or may not be valid. |
3426 |
/// first element may or may not be valid. |
| 3427 |
size_t FirstValidElement = 0; |
3427 |
size_t FirstValidElement = 0; |
| 3428 |
|
3428 |
|
| 3429 |
public: |
3429 |
public: |
| 3430 |
/// Inserts a new element to the collection. |
3430 |
/// Inserts a new element to the collection. |
| 3431 |
/// \returns true if the element is actually added, i.e. was not in the |
3431 |
/// \returns true if the element is actually added, i.e. was not in the |
| 3432 |
/// collection before the operation. |
3432 |
/// collection before the operation. |
| 3433 |
bool insert(PHINode *Ptr) { |
3433 |
bool insert(PHINode *Ptr) { |
| 3434 |
if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) { |
3434 |
if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) { |
| 3435 |
NodeList.push_back(Ptr); |
3435 |
NodeList.push_back(Ptr); |
| 3436 |
return true; |
3436 |
return true; |
| 3437 |
} |
3437 |
} |
| 3438 |
return false; |
3438 |
return false; |
| 3439 |
} |
3439 |
} |
| 3440 |
|
3440 |
|
| 3441 |
/// Removes the element from the collection. |
3441 |
/// Removes the element from the collection. |
| 3442 |
/// \returns whether the element is actually removed, i.e. was in the |
3442 |
/// \returns whether the element is actually removed, i.e. was in the |
| 3443 |
/// collection before the operation. |
3443 |
/// collection before the operation. |
| 3444 |
bool erase(PHINode *Ptr) { |
3444 |
bool erase(PHINode *Ptr) { |
| 3445 |
if (NodeMap.erase(Ptr)) { |
3445 |
if (NodeMap.erase(Ptr)) { |
| 3446 |
SkipRemovedElements(FirstValidElement); |
3446 |
SkipRemovedElements(FirstValidElement); |
| 3447 |
return true; |
3447 |
return true; |
| 3448 |
} |
3448 |
} |
| 3449 |
return false; |
3449 |
return false; |
| 3450 |
} |
3450 |
} |
| 3451 |
|
3451 |
|
| 3452 |
/// Removes all elements and clears the collection. |
3452 |
/// Removes all elements and clears the collection. |
| 3453 |
void clear() { |
3453 |
void clear() { |
| 3454 |
NodeMap.clear(); |
3454 |
NodeMap.clear(); |
| 3455 |
NodeList.clear(); |
3455 |
NodeList.clear(); |
| 3456 |
FirstValidElement = 0; |
3456 |
FirstValidElement = 0; |
| 3457 |
} |
3457 |
} |
| 3458 |
|
3458 |
|
| 3459 |
/// \returns an iterator that will iterate the elements in the order of |
3459 |
/// \returns an iterator that will iterate the elements in the order of |
| 3460 |
/// insertion. |
3460 |
/// insertion. |
| 3461 |
iterator begin() { |
3461 |
iterator begin() { |
| 3462 |
if (FirstValidElement == 0) |
3462 |
if (FirstValidElement == 0) |
| 3463 |
SkipRemovedElements(FirstValidElement); |
3463 |
SkipRemovedElements(FirstValidElement); |
| 3464 |
return PhiNodeSetIterator(this, FirstValidElement); |
3464 |
return PhiNodeSetIterator(this, FirstValidElement); |
| 3465 |
} |
3465 |
} |
| 3466 |
|
3466 |
|
| 3467 |
/// \returns an iterator that points to the end of the collection. |
3467 |
/// \returns an iterator that points to the end of the collection. |
| 3468 |
iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } |
3468 |
iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } |
| 3469 |
|
3469 |
|
| 3470 |
/// Returns the number of elements in the collection. |
3470 |
/// Returns the number of elements in the collection. |
| 3471 |
size_t size() const { return NodeMap.size(); } |
3471 |
size_t size() const { return NodeMap.size(); } |
| 3472 |
|
3472 |
|
| 3473 |
/// \returns 1 if the given element is in the collection, and 0 if otherwise. |
3473 |
/// \returns 1 if the given element is in the collection, and 0 if otherwise. |
| 3474 |
size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); } |
3474 |
size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); } |
| 3475 |
|
3475 |
|
| 3476 |
private: |
3476 |
private: |
| 3477 |
/// Updates the CurrentIndex so that it will point to a valid element. |
3477 |
/// Updates the CurrentIndex so that it will point to a valid element. |
| 3478 |
/// |
3478 |
/// |
| 3479 |
/// If the element of NodeList at CurrentIndex is valid, it does not |
3479 |
/// If the element of NodeList at CurrentIndex is valid, it does not |
| 3480 |
/// change it. If there are no more valid elements, it updates CurrentIndex |
3480 |
/// change it. If there are no more valid elements, it updates CurrentIndex |
| 3481 |
/// to point to the end of the NodeList. |
3481 |
/// to point to the end of the NodeList. |
| 3482 |
void SkipRemovedElements(size_t &CurrentIndex) { |
3482 |
void SkipRemovedElements(size_t &CurrentIndex) { |
| 3483 |
while (CurrentIndex < NodeList.size()) { |
3483 |
while (CurrentIndex < NodeList.size()) { |
| 3484 |
auto it = NodeMap.find(NodeList[CurrentIndex]); |
3484 |
auto it = NodeMap.find(NodeList[CurrentIndex]); |
| 3485 |
// If the element has been deleted and added again later, NodeMap will |
3485 |
// If the element has been deleted and added again later, NodeMap will |
| 3486 |
// point to a different index, so CurrentIndex will still be invalid. |
3486 |
// point to a different index, so CurrentIndex will still be invalid. |
| 3487 |
if (it != NodeMap.end() && it->second == CurrentIndex) |
3487 |
if (it != NodeMap.end() && it->second == CurrentIndex) |
| 3488 |
break; |
3488 |
break; |
| 3489 |
++CurrentIndex; |
3489 |
++CurrentIndex; |
| 3490 |
} |
3490 |
} |
| 3491 |
} |
3491 |
} |
| 3492 |
}; |
3492 |
}; |
| 3493 |
|
3493 |
|
| 3494 |
PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) |
3494 |
PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) |
| 3495 |
: Set(Set), CurrentIndex(Start) {} |
3495 |
: Set(Set), CurrentIndex(Start) {} |
| 3496 |
|
3496 |
|
| 3497 |
PHINode *PhiNodeSetIterator::operator*() const { |
3497 |
PHINode *PhiNodeSetIterator::operator*() const { |
| 3498 |
assert(CurrentIndex < Set->NodeList.size() && |
3498 |
assert(CurrentIndex < Set->NodeList.size() && |
| 3499 |
"PhiNodeSet access out of range"); |
3499 |
"PhiNodeSet access out of range"); |
| 3500 |
return Set->NodeList[CurrentIndex]; |
3500 |
return Set->NodeList[CurrentIndex]; |
| 3501 |
} |
3501 |
} |
| 3502 |
|
3502 |
|
| 3503 |
PhiNodeSetIterator &PhiNodeSetIterator::operator++() { |
3503 |
PhiNodeSetIterator &PhiNodeSetIterator::operator++() { |
| 3504 |
assert(CurrentIndex < Set->NodeList.size() && |
3504 |
assert(CurrentIndex < Set->NodeList.size() && |
| 3505 |
"PhiNodeSet access out of range"); |
3505 |
"PhiNodeSet access out of range"); |
| 3506 |
++CurrentIndex; |
3506 |
++CurrentIndex; |
| 3507 |
Set->SkipRemovedElements(CurrentIndex); |
3507 |
Set->SkipRemovedElements(CurrentIndex); |
| 3508 |
return *this; |
3508 |
return *this; |
| 3509 |
} |
3509 |
} |
| 3510 |
|
3510 |
|
| 3511 |
bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { |
3511 |
bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { |
| 3512 |
return CurrentIndex == RHS.CurrentIndex; |
3512 |
return CurrentIndex == RHS.CurrentIndex; |
| 3513 |
} |
3513 |
} |
| 3514 |
|
3514 |
|
| 3515 |
bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { |
3515 |
bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { |
| 3516 |
return !((*this) == RHS); |
3516 |
return !((*this) == RHS); |
| 3517 |
} |
3517 |
} |
| 3518 |
|
3518 |
|
| 3519 |
/// Keep track of simplification of Phi nodes. |
3519 |
/// Keep track of simplification of Phi nodes. |
| 3520 |
/// Accept the set of all phi nodes and erase phi node from this set |
3520 |
/// Accept the set of all phi nodes and erase phi node from this set |
| 3521 |
/// if it is simplified. |
3521 |
/// if it is simplified. |
| 3522 |
class SimplificationTracker { |
3522 |
class SimplificationTracker { |
| 3523 |
DenseMap Storage; |
3523 |
DenseMap Storage; |
| 3524 |
const SimplifyQuery &SQ; |
3524 |
const SimplifyQuery &SQ; |
| 3525 |
// Tracks newly created Phi nodes. The elements are iterated by insertion |
3525 |
// Tracks newly created Phi nodes. The elements are iterated by insertion |
| 3526 |
// order. |
3526 |
// order. |
| 3527 |
PhiNodeSet AllPhiNodes; |
3527 |
PhiNodeSet AllPhiNodes; |
| 3528 |
// Tracks newly created Select nodes. |
3528 |
// Tracks newly created Select nodes. |
| 3529 |
SmallPtrSet AllSelectNodes; |
3529 |
SmallPtrSet AllSelectNodes; |
| 3530 |
|
3530 |
|
| 3531 |
public: |
3531 |
public: |
| 3532 |
SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} |
3532 |
SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} |
| 3533 |
|
3533 |
|
| 3534 |
Value *Get(Value *V) { |
3534 |
Value *Get(Value *V) { |
| 3535 |
do { |
3535 |
do { |
| 3536 |
auto SV = Storage.find(V); |
3536 |
auto SV = Storage.find(V); |
| 3537 |
if (SV == Storage.end()) |
3537 |
if (SV == Storage.end()) |
| 3538 |
return V; |
3538 |
return V; |
| 3539 |
V = SV->second; |
3539 |
V = SV->second; |
| 3540 |
} while (true); |
3540 |
} while (true); |
| 3541 |
} |
3541 |
} |
| 3542 |
|
3542 |
|
| 3543 |
Value *Simplify(Value *Val) { |
3543 |
Value *Simplify(Value *Val) { |
| 3544 |
SmallVector WorkList; |
3544 |
SmallVector WorkList; |
| 3545 |
SmallPtrSet Visited; |
3545 |
SmallPtrSet Visited; |
| 3546 |
WorkList.push_back(Val); |
3546 |
WorkList.push_back(Val); |
| 3547 |
while (!WorkList.empty()) { |
3547 |
while (!WorkList.empty()) { |
| 3548 |
auto *P = WorkList.pop_back_val(); |
3548 |
auto *P = WorkList.pop_back_val(); |
| 3549 |
if (!Visited.insert(P).second) |
3549 |
if (!Visited.insert(P).second) |
| 3550 |
continue; |
3550 |
continue; |
| 3551 |
if (auto *PI = dyn_cast(P)) |
3551 |
if (auto *PI = dyn_cast(P)) |
| 3552 |
if (Value *V = simplifyInstruction(cast(PI), SQ)) { |
3552 |
if (Value *V = simplifyInstruction(cast(PI), SQ)) { |
| 3553 |
for (auto *U : PI->users()) |
3553 |
for (auto *U : PI->users()) |
| 3554 |
WorkList.push_back(cast(U)); |
3554 |
WorkList.push_back(cast(U)); |
| 3555 |
Put(PI, V); |
3555 |
Put(PI, V); |
| 3556 |
PI->replaceAllUsesWith(V); |
3556 |
PI->replaceAllUsesWith(V); |
| 3557 |
if (auto *PHI = dyn_cast(PI)) |
3557 |
if (auto *PHI = dyn_cast(PI)) |
| 3558 |
AllPhiNodes.erase(PHI); |
3558 |
AllPhiNodes.erase(PHI); |
| 3559 |
if (auto *Select = dyn_cast(PI)) |
3559 |
if (auto *Select = dyn_cast(PI)) |
| 3560 |
AllSelectNodes.erase(Select); |
3560 |
AllSelectNodes.erase(Select); |
| 3561 |
PI->eraseFromParent(); |
3561 |
PI->eraseFromParent(); |
| 3562 |
} |
3562 |
} |
| 3563 |
} |
3563 |
} |
| 3564 |
return Get(Val); |
3564 |
return Get(Val); |
| 3565 |
} |
3565 |
} |
| 3566 |
|
3566 |
|
| 3567 |
void Put(Value *From, Value *To) { Storage.insert({From, To}); } |
3567 |
void Put(Value *From, Value *To) { Storage.insert({From, To}); } |
| 3568 |
|
3568 |
|
| 3569 |
void ReplacePhi(PHINode *From, PHINode *To) { |
3569 |
void ReplacePhi(PHINode *From, PHINode *To) { |
| 3570 |
Value *OldReplacement = Get(From); |
3570 |
Value *OldReplacement = Get(From); |
| 3571 |
while (OldReplacement != From) { |
3571 |
while (OldReplacement != From) { |
| 3572 |
From = To; |
3572 |
From = To; |
| 3573 |
To = dyn_cast(OldReplacement); |
3573 |
To = dyn_cast(OldReplacement); |
| 3574 |
OldReplacement = Get(From); |
3574 |
OldReplacement = Get(From); |
| 3575 |
} |
3575 |
} |
| 3576 |
assert(To && Get(To) == To && "Replacement PHI node is already replaced."); |
3576 |
assert(To && Get(To) == To && "Replacement PHI node is already replaced."); |
| 3577 |
Put(From, To); |
3577 |
Put(From, To); |
| 3578 |
From->replaceAllUsesWith(To); |
3578 |
From->replaceAllUsesWith(To); |
| 3579 |
AllPhiNodes.erase(From); |
3579 |
AllPhiNodes.erase(From); |
| 3580 |
From->eraseFromParent(); |
3580 |
From->eraseFromParent(); |
| 3581 |
} |
3581 |
} |
| 3582 |
|
3582 |
|
| 3583 |
PhiNodeSet &newPhiNodes() { return AllPhiNodes; } |
3583 |
PhiNodeSet &newPhiNodes() { return AllPhiNodes; } |
| 3584 |
|
3584 |
|
| 3585 |
void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } |
3585 |
void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } |
| 3586 |
|
3586 |
|
| 3587 |
void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } |
3587 |
void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } |
| 3588 |
|
3588 |
|
| 3589 |
unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } |
3589 |
unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } |
| 3590 |
|
3590 |
|
| 3591 |
unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } |
3591 |
unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } |
| 3592 |
|
3592 |
|
| 3593 |
void destroyNewNodes(Type *CommonType) { |
3593 |
void destroyNewNodes(Type *CommonType) { |
| 3594 |
// For safe erasing, replace the uses with dummy value first. |
3594 |
// For safe erasing, replace the uses with dummy value first. |
| 3595 |
auto *Dummy = PoisonValue::get(CommonType); |
3595 |
auto *Dummy = PoisonValue::get(CommonType); |
| 3596 |
for (auto *I : AllPhiNodes) { |
3596 |
for (auto *I : AllPhiNodes) { |
| 3597 |
I->replaceAllUsesWith(Dummy); |
3597 |
I->replaceAllUsesWith(Dummy); |
| 3598 |
I->eraseFromParent(); |
3598 |
I->eraseFromParent(); |
| 3599 |
} |
3599 |
} |
| 3600 |
AllPhiNodes.clear(); |
3600 |
AllPhiNodes.clear(); |
| 3601 |
for (auto *I : AllSelectNodes) { |
3601 |
for (auto *I : AllSelectNodes) { |
| 3602 |
I->replaceAllUsesWith(Dummy); |
3602 |
I->replaceAllUsesWith(Dummy); |
| 3603 |
I->eraseFromParent(); |
3603 |
I->eraseFromParent(); |
| 3604 |
} |
3604 |
} |
| 3605 |
AllSelectNodes.clear(); |
3605 |
AllSelectNodes.clear(); |
| 3606 |
} |
3606 |
} |
| 3607 |
}; |
3607 |
}; |
| 3608 |
|
3608 |
|
| 3609 |
/// A helper class for combining addressing modes. |
3609 |
/// A helper class for combining addressing modes. |
| 3610 |
class AddressingModeCombiner { |
3610 |
class AddressingModeCombiner { |
| 3611 |
typedef DenseMap FoldAddrToValueMapping; |
3611 |
typedef DenseMap FoldAddrToValueMapping; |
| 3612 |
typedef std::pair PHIPair; |
3612 |
typedef std::pair PHIPair; |
| 3613 |
|
3613 |
|
| 3614 |
private: |
3614 |
private: |
| 3615 |
/// The addressing modes we've collected. |
3615 |
/// The addressing modes we've collected. |
| 3616 |
SmallVector AddrModes; |
3616 |
SmallVector AddrModes; |
| 3617 |
|
3617 |
|
| 3618 |
/// The field in which the AddrModes differ, when we have more than one. |
3618 |
/// The field in which the AddrModes differ, when we have more than one. |
| 3619 |
ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; |
3619 |
ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; |
| 3620 |
|
3620 |
|
| 3621 |
/// Are the AddrModes that we have all just equal to their original values? |
3621 |
/// Are the AddrModes that we have all just equal to their original values? |
| 3622 |
bool AllAddrModesTrivial = true; |
3622 |
bool AllAddrModesTrivial = true; |
| 3623 |
|
3623 |
|
| 3624 |
/// Common Type for all different fields in addressing modes. |
3624 |
/// Common Type for all different fields in addressing modes. |
| 3625 |
Type *CommonType = nullptr; |
3625 |
Type *CommonType = nullptr; |
| 3626 |
|
3626 |
|
| 3627 |
/// SimplifyQuery for simplifyInstruction utility. |
3627 |
/// SimplifyQuery for simplifyInstruction utility. |
| 3628 |
const SimplifyQuery &SQ; |
3628 |
const SimplifyQuery &SQ; |
| 3629 |
|
3629 |
|
| 3630 |
/// Original Address. |
3630 |
/// Original Address. |
| 3631 |
Value *Original; |
3631 |
Value *Original; |
| 3632 |
|
3632 |
|
| 3633 |
/// Common value among addresses |
3633 |
/// Common value among addresses |
| 3634 |
Value *CommonValue = nullptr; |
3634 |
Value *CommonValue = nullptr; |
| 3635 |
|
3635 |
|
| 3636 |
public: |
3636 |
public: |
| 3637 |
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) |
3637 |
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) |
| 3638 |
: SQ(_SQ), Original(OriginalValue) {} |
3638 |
: SQ(_SQ), Original(OriginalValue) {} |
| 3639 |
|
3639 |
|
| 3640 |
~AddressingModeCombiner() { eraseCommonValueIfDead(); } |
3640 |
~AddressingModeCombiner() { eraseCommonValueIfDead(); } |
| 3641 |
|
3641 |
|
| 3642 |
/// Get the combined AddrMode |
3642 |
/// Get the combined AddrMode |
| 3643 |
const ExtAddrMode &getAddrMode() const { return AddrModes[0]; } |
3643 |
const ExtAddrMode &getAddrMode() const { return AddrModes[0]; } |
| 3644 |
|
3644 |
|
| 3645 |
/// Add a new AddrMode if it's compatible with the AddrModes we already |
3645 |
/// Add a new AddrMode if it's compatible with the AddrModes we already |
| 3646 |
/// have. |
3646 |
/// have. |
| 3647 |
/// \return True iff we succeeded in doing so. |
3647 |
/// \return True iff we succeeded in doing so. |
| 3648 |
bool addNewAddrMode(ExtAddrMode &NewAddrMode) { |
3648 |
bool addNewAddrMode(ExtAddrMode &NewAddrMode) { |
| 3649 |
// Take note of if we have any non-trivial AddrModes, as we need to detect |
3649 |
// Take note of if we have any non-trivial AddrModes, as we need to detect |
| 3650 |
// when all AddrModes are trivial as then we would introduce a phi or select |
3650 |
// when all AddrModes are trivial as then we would introduce a phi or select |
| 3651 |
// which just duplicates what's already there. |
3651 |
// which just duplicates what's already there. |
| 3652 |
AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); |
3652 |
AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); |
| 3653 |
|
3653 |
|
| 3654 |
// If this is the first addrmode then everything is fine. |
3654 |
// If this is the first addrmode then everything is fine. |
| 3655 |
if (AddrModes.empty()) { |
3655 |
if (AddrModes.empty()) { |
| 3656 |
AddrModes.emplace_back(NewAddrMode); |
3656 |
AddrModes.emplace_back(NewAddrMode); |
| 3657 |
return true; |
3657 |
return true; |
| 3658 |
} |
3658 |
} |
| 3659 |
|
3659 |
|
| 3660 |
// Figure out how different this is from the other address modes, which we |
3660 |
// Figure out how different this is from the other address modes, which we |
| 3661 |
// can do just by comparing against the first one given that we only care |
3661 |
// can do just by comparing against the first one given that we only care |
| 3662 |
// about the cumulative difference. |
3662 |
// about the cumulative difference. |
| 3663 |
ExtAddrMode::FieldName ThisDifferentField = |
3663 |
ExtAddrMode::FieldName ThisDifferentField = |
| 3664 |
AddrModes[0].compare(NewAddrMode); |
3664 |
AddrModes[0].compare(NewAddrMode); |
| 3665 |
if (DifferentField == ExtAddrMode::NoField) |
3665 |
if (DifferentField == ExtAddrMode::NoField) |
| 3666 |
DifferentField = ThisDifferentField; |
3666 |
DifferentField = ThisDifferentField; |
| 3667 |
else if (DifferentField != ThisDifferentField) |
3667 |
else if (DifferentField != ThisDifferentField) |
| 3668 |
DifferentField = ExtAddrMode::MultipleFields; |
3668 |
DifferentField = ExtAddrMode::MultipleFields; |
| 3669 |
|
3669 |
|
| 3670 |
// If NewAddrMode differs in more than one dimension we cannot handle it. |
3670 |
// If NewAddrMode differs in more than one dimension we cannot handle it. |
| 3671 |
bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; |
3671 |
bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; |
| 3672 |
|
3672 |
|
| 3673 |
// If Scale Field is different then we reject. |
3673 |
// If Scale Field is different then we reject. |
| 3674 |
CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; |
3674 |
CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; |
| 3675 |
|
3675 |
|
| 3676 |
// We also must reject the case when base offset is different and |
3676 |
// We also must reject the case when base offset is different and |
| 3677 |
// scale reg is not null, we cannot handle this case due to merge of |
3677 |
// scale reg is not null, we cannot handle this case due to merge of |
| 3678 |
// different offsets will be used as ScaleReg. |
3678 |
// different offsets will be used as ScaleReg. |
| 3679 |
CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || |
3679 |
CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || |
| 3680 |
!NewAddrMode.ScaledReg); |
3680 |
!NewAddrMode.ScaledReg); |
| 3681 |
|
3681 |
|
| 3682 |
// We also must reject the case when GV is different and BaseReg installed |
3682 |
// We also must reject the case when GV is different and BaseReg installed |
| 3683 |
// due to we want to use base reg as a merge of GV values. |
3683 |
// due to we want to use base reg as a merge of GV values. |
| 3684 |
CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || |
3684 |
CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || |
| 3685 |
!NewAddrMode.HasBaseReg); |
3685 |
!NewAddrMode.HasBaseReg); |
| 3686 |
|
3686 |
|
| 3687 |
// Even if NewAddMode is the same we still need to collect it due to |
3687 |
// Even if NewAddMode is the same we still need to collect it due to |
| 3688 |
// original value is different. And later we will need all original values |
3688 |
// original value is different. And later we will need all original values |
| 3689 |
// as anchors during finding the common Phi node. |
3689 |
// as anchors during finding the common Phi node. |
| 3690 |
if (CanHandle) |
3690 |
if (CanHandle) |
| 3691 |
AddrModes.emplace_back(NewAddrMode); |
3691 |
AddrModes.emplace_back(NewAddrMode); |
| 3692 |
else |
3692 |
else |
| 3693 |
AddrModes.clear(); |
3693 |
AddrModes.clear(); |
| 3694 |
|
3694 |
|
| 3695 |
return CanHandle; |
3695 |
return CanHandle; |
| 3696 |
} |
3696 |
} |
| 3697 |
|
3697 |
|
| 3698 |
/// Combine the addressing modes we've collected into a single |
3698 |
/// Combine the addressing modes we've collected into a single |
| 3699 |
/// addressing mode. |
3699 |
/// addressing mode. |
| 3700 |
/// \return True iff we successfully combined them or we only had one so |
3700 |
/// \return True iff we successfully combined them or we only had one so |
| 3701 |
/// didn't need to combine them anyway. |
3701 |
/// didn't need to combine them anyway. |
| 3702 |
bool combineAddrModes() { |
3702 |
bool combineAddrModes() { |
| 3703 |
// If we have no AddrModes then they can't be combined. |
3703 |
// If we have no AddrModes then they can't be combined. |
| 3704 |
if (AddrModes.size() == 0) |
3704 |
if (AddrModes.size() == 0) |
| 3705 |
return false; |
3705 |
return false; |
| 3706 |
|
3706 |
|
| 3707 |
// A single AddrMode can trivially be combined. |
3707 |
// A single AddrMode can trivially be combined. |
| 3708 |
if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField) |
3708 |
if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField) |
| 3709 |
return true; |
3709 |
return true; |
| 3710 |
|
3710 |
|
| 3711 |
// If the AddrModes we collected are all just equal to the value they are |
3711 |
// If the AddrModes we collected are all just equal to the value they are |
| 3712 |
// derived from then combining them wouldn't do anything useful. |
3712 |
// derived from then combining them wouldn't do anything useful. |
| 3713 |
if (AllAddrModesTrivial) |
3713 |
if (AllAddrModesTrivial) |
| 3714 |
return false; |
3714 |
return false; |
| 3715 |
|
3715 |
|
| 3716 |
if (!addrModeCombiningAllowed()) |
3716 |
if (!addrModeCombiningAllowed()) |
| 3717 |
return false; |
3717 |
return false; |
| 3718 |
|
3718 |
|
| 3719 |
// Build a map between to |
3719 |
// Build a map between to |
| 3720 |
// value of base register. |
3720 |
// value of base register. |
| 3721 |
// Bail out if there is no common type. |
3721 |
// Bail out if there is no common type. |
| 3722 |
FoldAddrToValueMapping Map; |
3722 |
FoldAddrToValueMapping Map; |
| 3723 |
if (!initializeMap(Map)) |
3723 |
if (!initializeMap(Map)) |
| 3724 |
return false; |
3724 |
return false; |
| 3725 |
|
3725 |
|
| 3726 |
CommonValue = findCommon(Map); |
3726 |
CommonValue = findCommon(Map); |
| 3727 |
if (CommonValue) |
3727 |
if (CommonValue) |
| 3728 |
AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); |
3728 |
AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); |
| 3729 |
return CommonValue != nullptr; |
3729 |
return CommonValue != nullptr; |
| 3730 |
} |
3730 |
} |
| 3731 |
|
3731 |
|
| 3732 |
private: |
3732 |
private: |
| 3733 |
/// `CommonValue` may be a placeholder inserted by us. |
3733 |
/// `CommonValue` may be a placeholder inserted by us. |
| 3734 |
/// If the placeholder is not used, we should remove this dead instruction. |
3734 |
/// If the placeholder is not used, we should remove this dead instruction. |
| 3735 |
void eraseCommonValueIfDead() { |
3735 |
void eraseCommonValueIfDead() { |
| 3736 |
if (CommonValue && CommonValue->getNumUses() == 0) |
3736 |
if (CommonValue && CommonValue->getNumUses() == 0) |
| 3737 |
if (Instruction *CommonInst = dyn_cast(CommonValue)) |
3737 |
if (Instruction *CommonInst = dyn_cast(CommonValue)) |
| 3738 |
CommonInst->eraseFromParent(); |
3738 |
CommonInst->eraseFromParent(); |
| 3739 |
} |
3739 |
} |
| 3740 |
|
3740 |
|
| 3741 |
/// Initialize Map with anchor values. For address seen |
3741 |
/// Initialize Map with anchor values. For address seen |
| 3742 |
/// we set the value of different field saw in this address. |
3742 |
/// we set the value of different field saw in this address. |
| 3743 |
/// At the same time we find a common type for different field we will |
3743 |
/// At the same time we find a common type for different field we will |
| 3744 |
/// use to create new Phi/Select nodes. Keep it in CommonType field. |
3744 |
/// use to create new Phi/Select nodes. Keep it in CommonType field. |
| 3745 |
/// Return false if there is no common type found. |
3745 |
/// Return false if there is no common type found. |
| 3746 |
bool initializeMap(FoldAddrToValueMapping &Map) { |
3746 |
bool initializeMap(FoldAddrToValueMapping &Map) { |
| 3747 |
// Keep track of keys where the value is null. We will need to replace it |
3747 |
// Keep track of keys where the value is null. We will need to replace it |
| 3748 |
// with constant null when we know the common type. |
3748 |
// with constant null when we know the common type. |
| 3749 |
SmallVector NullValue; |
3749 |
SmallVector NullValue; |
| 3750 |
Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); |
3750 |
Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); |
| 3751 |
for (auto &AM : AddrModes) { |
3751 |
for (auto &AM : AddrModes) { |
| 3752 |
Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); |
3752 |
Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); |
| 3753 |
if (DV) { |
3753 |
if (DV) { |
| 3754 |
auto *Type = DV->getType(); |
3754 |
auto *Type = DV->getType(); |
| 3755 |
if (CommonType && CommonType != Type) |
3755 |
if (CommonType && CommonType != Type) |
| 3756 |
return false; |
3756 |
return false; |
| 3757 |
CommonType = Type; |
3757 |
CommonType = Type; |
| 3758 |
Map[AM.OriginalValue] = DV; |
3758 |
Map[AM.OriginalValue] = DV; |
| 3759 |
} else { |
3759 |
} else { |
| 3760 |
NullValue.push_back(AM.OriginalValue); |
3760 |
NullValue.push_back(AM.OriginalValue); |
| 3761 |
} |
3761 |
} |
| 3762 |
} |
3762 |
} |
| 3763 |
assert(CommonType && "At least one non-null value must be!"); |
3763 |
assert(CommonType && "At least one non-null value must be!"); |
| 3764 |
for (auto *V : NullValue) |
3764 |
for (auto *V : NullValue) |
| 3765 |
Map[V] = Constant::getNullValue(CommonType); |
3765 |
Map[V] = Constant::getNullValue(CommonType); |
| 3766 |
return true; |
3766 |
return true; |
| 3767 |
} |
3767 |
} |
| 3768 |
|
3768 |
|
| 3769 |
/// We have mapping between value A and other value B where B was a field in |
3769 |
/// We have mapping between value A and other value B where B was a field in |
| 3770 |
/// addressing mode represented by A. Also we have an original value C |
3770 |
/// addressing mode represented by A. Also we have an original value C |
| 3771 |
/// representing an address we start with. Traversing from C through phi and |
3771 |
/// representing an address we start with. Traversing from C through phi and |
| 3772 |
/// selects we ended up with A's in a map. This utility function tries to find |
3772 |
/// selects we ended up with A's in a map. This utility function tries to find |
| 3773 |
/// a value V which is a field in addressing mode C and traversing through phi |
3773 |
/// a value V which is a field in addressing mode C and traversing through phi |
| 3774 |
/// nodes and selects we will end up in corresponded values B in a map. |
3774 |
/// nodes and selects we will end up in corresponded values B in a map. |
| 3775 |
/// The utility will create a new Phi/Selects if needed. |
3775 |
/// The utility will create a new Phi/Selects if needed. |
| 3776 |
// The simple example looks as follows: |
3776 |
// The simple example looks as follows: |
| 3777 |
// BB1: |
3777 |
// BB1: |
| 3778 |
// p1 = b1 + 40 |
3778 |
// p1 = b1 + 40 |
| 3779 |
// br cond BB2, BB3 |
3779 |
// br cond BB2, BB3 |
| 3780 |
// BB2: |
3780 |
// BB2: |
| 3781 |
// p2 = b2 + 40 |
3781 |
// p2 = b2 + 40 |
| 3782 |
// br BB3 |
3782 |
// br BB3 |
| 3783 |
// BB3: |
3783 |
// BB3: |
| 3784 |
// p = phi [p1, BB1], [p2, BB2] |
3784 |
// p = phi [p1, BB1], [p2, BB2] |
| 3785 |
// v = load p |
3785 |
// v = load p |
| 3786 |
// Map is |
3786 |
// Map is |
| 3787 |
// p1 -> b1 |
3787 |
// p1 -> b1 |
| 3788 |
// p2 -> b2 |
3788 |
// p2 -> b2 |
| 3789 |
// Request is |
3789 |
// Request is |
| 3790 |
// p -> ? |
3790 |
// p -> ? |
| 3791 |
// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. |
3791 |
// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. |
| 3792 |
Value *findCommon(FoldAddrToValueMapping &Map) { |
3792 |
Value *findCommon(FoldAddrToValueMapping &Map) { |
| 3793 |
// Tracks the simplification of newly created phi nodes. The reason we use |
3793 |
// Tracks the simplification of newly created phi nodes. The reason we use |
| 3794 |
// this mapping is because we will add new created Phi nodes in AddrToBase. |
3794 |
// this mapping is because we will add new created Phi nodes in AddrToBase. |
| 3795 |
// Simplification of Phi nodes is recursive, so some Phi node may |
3795 |
// Simplification of Phi nodes is recursive, so some Phi node may |
| 3796 |
// be simplified after we added it to AddrToBase. In reality this |
3796 |
// be simplified after we added it to AddrToBase. In reality this |
| 3797 |
// simplification is possible only if original phi/selects were not |
3797 |
// simplification is possible only if original phi/selects were not |
| 3798 |
// simplified yet. |
3798 |
// simplified yet. |
| 3799 |
// Using this mapping we can find the current value in AddrToBase. |
3799 |
// Using this mapping we can find the current value in AddrToBase. |
| 3800 |
SimplificationTracker ST(SQ); |
3800 |
SimplificationTracker ST(SQ); |
| 3801 |
|
3801 |
|
| 3802 |
// First step, DFS to create PHI nodes for all intermediate blocks. |
3802 |
// First step, DFS to create PHI nodes for all intermediate blocks. |
| 3803 |
// Also fill traverse order for the second step. |
3803 |
// Also fill traverse order for the second step. |
| 3804 |
SmallVector TraverseOrder; |
3804 |
SmallVector TraverseOrder; |
| 3805 |
InsertPlaceholders(Map, TraverseOrder, ST); |
3805 |
InsertPlaceholders(Map, TraverseOrder, ST); |
| 3806 |
|
3806 |
|
| 3807 |
// Second Step, fill new nodes by merged values and simplify if possible. |
3807 |
// Second Step, fill new nodes by merged values and simplify if possible. |
| 3808 |
FillPlaceholders(Map, TraverseOrder, ST); |
3808 |
FillPlaceholders(Map, TraverseOrder, ST); |
| 3809 |
|
3809 |
|
| 3810 |
if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { |
3810 |
if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { |
| 3811 |
ST.destroyNewNodes(CommonType); |
3811 |
ST.destroyNewNodes(CommonType); |
| 3812 |
return nullptr; |
3812 |
return nullptr; |
| 3813 |
} |
3813 |
} |
| 3814 |
|
3814 |
|
| 3815 |
// Now we'd like to match New Phi nodes to existed ones. |
3815 |
// Now we'd like to match New Phi nodes to existed ones. |
| 3816 |
unsigned PhiNotMatchedCount = 0; |
3816 |
unsigned PhiNotMatchedCount = 0; |
| 3817 |
if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { |
3817 |
if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { |
| 3818 |
ST.destroyNewNodes(CommonType); |
3818 |
ST.destroyNewNodes(CommonType); |
| 3819 |
return nullptr; |
3819 |
return nullptr; |
| 3820 |
} |
3820 |
} |
| 3821 |
|
3821 |
|
| 3822 |
auto *Result = ST.Get(Map.find(Original)->second); |
3822 |
auto *Result = ST.Get(Map.find(Original)->second); |
| 3823 |
if (Result) { |
3823 |
if (Result) { |
| 3824 |
NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; |
3824 |
NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; |
| 3825 |
NumMemoryInstsSelectCreated += ST.countNewSelectNodes(); |
3825 |
NumMemoryInstsSelectCreated += ST.countNewSelectNodes(); |
| 3826 |
} |
3826 |
} |
| 3827 |
return Result; |
3827 |
return Result; |
| 3828 |
} |
3828 |
} |
| 3829 |
|
3829 |
|
| 3830 |
/// Try to match PHI node to Candidate. |
3830 |
/// Try to match PHI node to Candidate. |
| 3831 |
/// Matcher tracks the matched Phi nodes. |
3831 |
/// Matcher tracks the matched Phi nodes. |
| 3832 |
bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, |
3832 |
bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, |
| 3833 |
SmallSetVector &Matcher, |
3833 |
SmallSetVector &Matcher, |
| 3834 |
PhiNodeSet &PhiNodesToMatch) { |
3834 |
PhiNodeSet &PhiNodesToMatch) { |
| 3835 |
SmallVector WorkList; |
3835 |
SmallVector WorkList; |
| 3836 |
Matcher.insert({PHI, Candidate}); |
3836 |
Matcher.insert({PHI, Candidate}); |
| 3837 |
SmallSet MatchedPHIs; |
3837 |
SmallSet MatchedPHIs; |
| 3838 |
MatchedPHIs.insert(PHI); |
3838 |
MatchedPHIs.insert(PHI); |
| 3839 |
WorkList.push_back({PHI, Candidate}); |
3839 |
WorkList.push_back({PHI, Candidate}); |
| 3840 |
SmallSet Visited; |
3840 |
SmallSet Visited; |
| 3841 |
while (!WorkList.empty()) { |
3841 |
while (!WorkList.empty()) { |
| 3842 |
auto Item = WorkList.pop_back_val(); |
3842 |
auto Item = WorkList.pop_back_val(); |
| 3843 |
if (!Visited.insert(Item).second) |
3843 |
if (!Visited.insert(Item).second) |
| 3844 |
continue; |
3844 |
continue; |
| 3845 |
// We iterate over all incoming values to Phi to compare them. |
3845 |
// We iterate over all incoming values to Phi to compare them. |
| 3846 |
// If values are different and both of them Phi and the first one is a |
3846 |
// If values are different and both of them Phi and the first one is a |
| 3847 |
// Phi we added (subject to match) and both of them is in the same basic |
3847 |
// Phi we added (subject to match) and both of them is in the same basic |
| 3848 |
// block then we can match our pair if values match. So we state that |
3848 |
// block then we can match our pair if values match. So we state that |
| 3849 |
// these values match and add it to work list to verify that. |
3849 |
// these values match and add it to work list to verify that. |
| 3850 |
for (auto *B : Item.first->blocks()) { |
3850 |
for (auto *B : Item.first->blocks()) { |
| 3851 |
Value *FirstValue = Item.first->getIncomingValueForBlock(B); |
3851 |
Value *FirstValue = Item.first->getIncomingValueForBlock(B); |
| 3852 |
Value *SecondValue = Item.second->getIncomingValueForBlock(B); |
3852 |
Value *SecondValue = Item.second->getIncomingValueForBlock(B); |
| 3853 |
if (FirstValue == SecondValue) |
3853 |
if (FirstValue == SecondValue) |
| 3854 |
continue; |
3854 |
continue; |
| 3855 |
|
3855 |
|
| 3856 |
PHINode *FirstPhi = dyn_cast(FirstValue); |
3856 |
PHINode *FirstPhi = dyn_cast(FirstValue); |
| 3857 |
PHINode *SecondPhi = dyn_cast(SecondValue); |
3857 |
PHINode *SecondPhi = dyn_cast(SecondValue); |
| 3858 |
|
3858 |
|
| 3859 |
// One of them is not Phi or |
3859 |
// One of them is not Phi or |
| 3860 |
// The first one is not Phi node from the set we'd like to match or |
3860 |
// The first one is not Phi node from the set we'd like to match or |
| 3861 |
// Phi nodes from different basic blocks then |
3861 |
// Phi nodes from different basic blocks then |
| 3862 |
// we will not be able to match. |
3862 |
// we will not be able to match. |
| 3863 |
if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || |
3863 |
if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || |
| 3864 |
FirstPhi->getParent() != SecondPhi->getParent()) |
3864 |
FirstPhi->getParent() != SecondPhi->getParent()) |
| 3865 |
return false; |
3865 |
return false; |
| 3866 |
|
3866 |
|
| 3867 |
// If we already matched them then continue. |
3867 |
// If we already matched them then continue. |
| 3868 |
if (Matcher.count({FirstPhi, SecondPhi})) |
3868 |
if (Matcher.count({FirstPhi, SecondPhi})) |
| 3869 |
continue; |
3869 |
continue; |
| 3870 |
// So the values are different and does not match. So we need them to |
3870 |
// So the values are different and does not match. So we need them to |
| 3871 |
// match. (But we register no more than one match per PHI node, so that |
3871 |
// match. (But we register no more than one match per PHI node, so that |
| 3872 |
// we won't later try to replace them twice.) |
3872 |
// we won't later try to replace them twice.) |
| 3873 |
if (MatchedPHIs.insert(FirstPhi).second) |
3873 |
if (MatchedPHIs.insert(FirstPhi).second) |
| 3874 |
Matcher.insert({FirstPhi, SecondPhi}); |
3874 |
Matcher.insert({FirstPhi, SecondPhi}); |
| 3875 |
// But me must check it. |
3875 |
// But me must check it. |
| 3876 |
WorkList.push_back({FirstPhi, SecondPhi}); |
3876 |
WorkList.push_back({FirstPhi, SecondPhi}); |
| 3877 |
} |
3877 |
} |
| 3878 |
} |
3878 |
} |
| 3879 |
return true; |
3879 |
return true; |
| 3880 |
} |
3880 |
} |
| 3881 |
|
3881 |
|
| 3882 |
/// For the given set of PHI nodes (in the SimplificationTracker) try |
3882 |
/// For the given set of PHI nodes (in the SimplificationTracker) try |
| 3883 |
/// to find their equivalents. |
3883 |
/// to find their equivalents. |
| 3884 |
/// Returns false if this matching fails and creation of new Phi is disabled. |
3884 |
/// Returns false if this matching fails and creation of new Phi is disabled. |
| 3885 |
bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, |
3885 |
bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, |
| 3886 |
unsigned &PhiNotMatchedCount) { |
3886 |
unsigned &PhiNotMatchedCount) { |
| 3887 |
// Matched and PhiNodesToMatch iterate their elements in a deterministic |
3887 |
// Matched and PhiNodesToMatch iterate their elements in a deterministic |
| 3888 |
// order, so the replacements (ReplacePhi) are also done in a deterministic |
3888 |
// order, so the replacements (ReplacePhi) are also done in a deterministic |
| 3889 |
// order. |
3889 |
// order. |
| 3890 |
SmallSetVector Matched; |
3890 |
SmallSetVector Matched; |
| 3891 |
SmallPtrSet WillNotMatch; |
3891 |
SmallPtrSet WillNotMatch; |
| 3892 |
PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes(); |
3892 |
PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes(); |
| 3893 |
while (PhiNodesToMatch.size()) { |
3893 |
while (PhiNodesToMatch.size()) { |
| 3894 |
PHINode *PHI = *PhiNodesToMatch.begin(); |
3894 |
PHINode *PHI = *PhiNodesToMatch.begin(); |
| 3895 |
|
3895 |
|
| 3896 |
// Add us, if no Phi nodes in the basic block we do not match. |
3896 |
// Add us, if no Phi nodes in the basic block we do not match. |
| 3897 |
WillNotMatch.clear(); |
3897 |
WillNotMatch.clear(); |
| 3898 |
WillNotMatch.insert(PHI); |
3898 |
WillNotMatch.insert(PHI); |
| 3899 |
|
3899 |
|
| 3900 |
// Traverse all Phis until we found equivalent or fail to do that. |
3900 |
// Traverse all Phis until we found equivalent or fail to do that. |
| 3901 |
bool IsMatched = false; |
3901 |
bool IsMatched = false; |
| 3902 |
for (auto &P : PHI->getParent()->phis()) { |
3902 |
for (auto &P : PHI->getParent()->phis()) { |
| 3903 |
// Skip new Phi nodes. |
3903 |
// Skip new Phi nodes. |
| 3904 |
if (PhiNodesToMatch.count(&P)) |
3904 |
if (PhiNodesToMatch.count(&P)) |
| 3905 |
continue; |
3905 |
continue; |
| 3906 |
if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) |
3906 |
if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) |
| 3907 |
break; |
3907 |
break; |
| 3908 |
// If it does not match, collect all Phi nodes from matcher. |
3908 |
// If it does not match, collect all Phi nodes from matcher. |
| 3909 |
// if we end up with no match, them all these Phi nodes will not match |
3909 |
// if we end up with no match, them all these Phi nodes will not match |
| 3910 |
// later. |
3910 |
// later. |
| 3911 |
for (auto M : Matched) |
3911 |
for (auto M : Matched) |
| 3912 |
WillNotMatch.insert(M.first); |
3912 |
WillNotMatch.insert(M.first); |
| 3913 |
Matched.clear(); |
3913 |
Matched.clear(); |
| 3914 |
} |
3914 |
} |
| 3915 |
if (IsMatched) { |
3915 |
if (IsMatched) { |
| 3916 |
// Replace all matched values and erase them. |
3916 |
// Replace all matched values and erase them. |
| 3917 |
for (auto MV : Matched) |
3917 |
for (auto MV : Matched) |
| 3918 |
ST.ReplacePhi(MV.first, MV.second); |
3918 |
ST.ReplacePhi(MV.first, MV.second); |
| 3919 |
Matched.clear(); |
3919 |
Matched.clear(); |
| 3920 |
continue; |
3920 |
continue; |
| 3921 |
} |
3921 |
} |
| 3922 |
// If we are not allowed to create new nodes then bail out. |
3922 |
// If we are not allowed to create new nodes then bail out. |
| 3923 |
if (!AllowNewPhiNodes) |
3923 |
if (!AllowNewPhiNodes) |
| 3924 |
return false; |
3924 |
return false; |
| 3925 |
// Just remove all seen values in matcher. They will not match anything. |
3925 |
// Just remove all seen values in matcher. They will not match anything. |
| 3926 |
PhiNotMatchedCount += WillNotMatch.size(); |
3926 |
PhiNotMatchedCount += WillNotMatch.size(); |
| 3927 |
for (auto *P : WillNotMatch) |
3927 |
for (auto *P : WillNotMatch) |
| 3928 |
PhiNodesToMatch.erase(P); |
3928 |
PhiNodesToMatch.erase(P); |
| 3929 |
} |
3929 |
} |
| 3930 |
return true; |
3930 |
return true; |
| 3931 |
} |
3931 |
} |
| 3932 |
/// Fill the placeholders with values from predecessors and simplify them. |
3932 |
/// Fill the placeholders with values from predecessors and simplify them. |
| 3933 |
void FillPlaceholders(FoldAddrToValueMapping &Map, |
3933 |
void FillPlaceholders(FoldAddrToValueMapping &Map, |
| 3934 |
SmallVectorImpl &TraverseOrder, |
3934 |
SmallVectorImpl &TraverseOrder, |
| 3935 |
SimplificationTracker &ST) { |
3935 |
SimplificationTracker &ST) { |
| 3936 |
while (!TraverseOrder.empty()) { |
3936 |
while (!TraverseOrder.empty()) { |
| 3937 |
Value *Current = TraverseOrder.pop_back_val(); |
3937 |
Value *Current = TraverseOrder.pop_back_val(); |
| 3938 |
assert(Map.contains(Current) && "No node to fill!!!"); |
3938 |
assert(Map.contains(Current) && "No node to fill!!!"); |
| 3939 |
Value *V = Map[Current]; |
3939 |
Value *V = Map[Current]; |
| 3940 |
|
3940 |
|
| 3941 |
if (SelectInst *Select = dyn_cast(V)) { |
3941 |
if (SelectInst *Select = dyn_cast(V)) { |
| 3942 |
// CurrentValue also must be Select. |
3942 |
// CurrentValue also must be Select. |
| 3943 |
auto *CurrentSelect = cast(Current); |
3943 |
auto *CurrentSelect = cast(Current); |
| 3944 |
auto *TrueValue = CurrentSelect->getTrueValue(); |
3944 |
auto *TrueValue = CurrentSelect->getTrueValue(); |
| 3945 |
assert(Map.contains(TrueValue) && "No True Value!"); |
3945 |
assert(Map.contains(TrueValue) && "No True Value!"); |
| 3946 |
Select->setTrueValue(ST.Get(Map[TrueValue])); |
3946 |
Select->setTrueValue(ST.Get(Map[TrueValue])); |
| 3947 |
auto *FalseValue = CurrentSelect->getFalseValue(); |
3947 |
auto *FalseValue = CurrentSelect->getFalseValue(); |
| 3948 |
assert(Map.contains(FalseValue) && "No False Value!"); |
3948 |
assert(Map.contains(FalseValue) && "No False Value!"); |
| 3949 |
Select->setFalseValue(ST.Get(Map[FalseValue])); |
3949 |
Select->setFalseValue(ST.Get(Map[FalseValue])); |
| 3950 |
} else { |
3950 |
} else { |
| 3951 |
// Must be a Phi node then. |
3951 |
// Must be a Phi node then. |
| 3952 |
auto *PHI = cast(V); |
3952 |
auto *PHI = cast(V); |
| 3953 |
// Fill the Phi node with values from predecessors. |
3953 |
// Fill the Phi node with values from predecessors. |
| 3954 |
for (auto *B : predecessors(PHI->getParent())) { |
3954 |
for (auto *B : predecessors(PHI->getParent())) { |
| 3955 |
Value *PV = cast(Current)->getIncomingValueForBlock(B); |
3955 |
Value *PV = cast(Current)->getIncomingValueForBlock(B); |
| 3956 |
assert(Map.contains(PV) && "No predecessor Value!"); |
3956 |
assert(Map.contains(PV) && "No predecessor Value!"); |
| 3957 |
PHI->addIncoming(ST.Get(Map[PV]), B); |
3957 |
PHI->addIncoming(ST.Get(Map[PV]), B); |
| 3958 |
} |
3958 |
} |
| 3959 |
} |
3959 |
} |
| 3960 |
Map[Current] = ST.Simplify(V); |
3960 |
Map[Current] = ST.Simplify(V); |
| 3961 |
} |
3961 |
} |
| 3962 |
} |
3962 |
} |
| 3963 |
|
3963 |
|
| 3964 |
/// Starting from original value recursively iterates over def-use chain up to |
3964 |
/// Starting from original value recursively iterates over def-use chain up to |
| 3965 |
/// known ending values represented in a map. For each traversed phi/select |
3965 |
/// known ending values represented in a map. For each traversed phi/select |
| 3966 |
/// inserts a placeholder Phi or Select. |
3966 |
/// inserts a placeholder Phi or Select. |
| 3967 |
/// Reports all new created Phi/Select nodes by adding them to set. |
3967 |
/// Reports all new created Phi/Select nodes by adding them to set. |
| 3968 |
/// Also reports and order in what values have been traversed. |
3968 |
/// Also reports and order in what values have been traversed. |
| 3969 |
void InsertPlaceholders(FoldAddrToValueMapping &Map, |
3969 |
void InsertPlaceholders(FoldAddrToValueMapping &Map, |
| 3970 |
SmallVectorImpl &TraverseOrder, |
3970 |
SmallVectorImpl &TraverseOrder, |
| 3971 |
SimplificationTracker &ST) { |
3971 |
SimplificationTracker &ST) { |
| 3972 |
SmallVector Worklist; |
3972 |
SmallVector Worklist; |
| 3973 |
assert((isa(Original) || isa(Original)) && |
3973 |
assert((isa(Original) || isa(Original)) && |
| 3974 |
"Address must be a Phi or Select node"); |
3974 |
"Address must be a Phi or Select node"); |
| 3975 |
auto *Dummy = PoisonValue::get(CommonType); |
3975 |
auto *Dummy = PoisonValue::get(CommonType); |
| 3976 |
Worklist.push_back(Original); |
3976 |
Worklist.push_back(Original); |
| 3977 |
while (!Worklist.empty()) { |
3977 |
while (!Worklist.empty()) { |
| 3978 |
Value *Current = Worklist.pop_back_val(); |
3978 |
Value *Current = Worklist.pop_back_val(); |
| 3979 |
// if it is already visited or it is an ending value then skip it. |
3979 |
// if it is already visited or it is an ending value then skip it. |
| 3980 |
if (Map.contains(Current)) |
3980 |
if (Map.contains(Current)) |
| 3981 |
continue; |
3981 |
continue; |
| 3982 |
TraverseOrder.push_back(Current); |
3982 |
TraverseOrder.push_back(Current); |
| 3983 |
|
3983 |
|
| 3984 |
// CurrentValue must be a Phi node or select. All others must be covered |
3984 |
// CurrentValue must be a Phi node or select. All others must be covered |
| 3985 |
// by anchors. |
3985 |
// by anchors. |
| 3986 |
if (SelectInst *CurrentSelect = dyn_cast(Current)) { |
3986 |
if (SelectInst *CurrentSelect = dyn_cast(Current)) { |
| 3987 |
// Is it OK to get metadata from OrigSelect?! |
3987 |
// Is it OK to get metadata from OrigSelect?! |
| 3988 |
// Create a Select placeholder with dummy value. |
3988 |
// Create a Select placeholder with dummy value. |
| 3989 |
SelectInst *Select = SelectInst::Create( |
3989 |
SelectInst *Select = SelectInst::Create( |
| 3990 |
CurrentSelect->getCondition(), Dummy, Dummy, |
3990 |
CurrentSelect->getCondition(), Dummy, Dummy, |
| 3991 |
CurrentSelect->getName(), CurrentSelect, CurrentSelect); |
3991 |
CurrentSelect->getName(), CurrentSelect, CurrentSelect); |
| 3992 |
Map[Current] = Select; |
3992 |
Map[Current] = Select; |
| 3993 |
ST.insertNewSelect(Select); |
3993 |
ST.insertNewSelect(Select); |
| 3994 |
// We are interested in True and False values. |
3994 |
// We are interested in True and False values. |
| 3995 |
Worklist.push_back(CurrentSelect->getTrueValue()); |
3995 |
Worklist.push_back(CurrentSelect->getTrueValue()); |
| 3996 |
Worklist.push_back(CurrentSelect->getFalseValue()); |
3996 |
Worklist.push_back(CurrentSelect->getFalseValue()); |
| 3997 |
} else { |
3997 |
} else { |
| 3998 |
// It must be a Phi node then. |
3998 |
// It must be a Phi node then. |
| 3999 |
PHINode *CurrentPhi = cast(Current); |
3999 |
PHINode *CurrentPhi = cast(Current); |
| 4000 |
unsigned PredCount = CurrentPhi->getNumIncomingValues(); |
4000 |
unsigned PredCount = CurrentPhi->getNumIncomingValues(); |
| 4001 |
PHINode *PHI = |
4001 |
PHINode *PHI = |
| 4002 |
PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); |
4002 |
PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); |
| 4003 |
Map[Current] = PHI; |
4003 |
Map[Current] = PHI; |
| 4004 |
ST.insertNewPhi(PHI); |
4004 |
ST.insertNewPhi(PHI); |
| 4005 |
append_range(Worklist, CurrentPhi->incoming_values()); |
4005 |
append_range(Worklist, CurrentPhi->incoming_values()); |
| 4006 |
} |
4006 |
} |
| 4007 |
} |
4007 |
} |
| 4008 |
} |
4008 |
} |
| 4009 |
|
4009 |
|
| 4010 |
bool addrModeCombiningAllowed() { |
4010 |
bool addrModeCombiningAllowed() { |
| 4011 |
if (DisableComplexAddrModes) |
4011 |
if (DisableComplexAddrModes) |
| 4012 |
return false; |
4012 |
return false; |
| 4013 |
switch (DifferentField) { |
4013 |
switch (DifferentField) { |
| 4014 |
default: |
4014 |
default: |
| 4015 |
return false; |
4015 |
return false; |
| 4016 |
case ExtAddrMode::BaseRegField: |
4016 |
case ExtAddrMode::BaseRegField: |
| 4017 |
return AddrSinkCombineBaseReg; |
4017 |
return AddrSinkCombineBaseReg; |
| 4018 |
case ExtAddrMode::BaseGVField: |
4018 |
case ExtAddrMode::BaseGVField: |
| 4019 |
return AddrSinkCombineBaseGV; |
4019 |
return AddrSinkCombineBaseGV; |
| 4020 |
case ExtAddrMode::BaseOffsField: |
4020 |
case ExtAddrMode::BaseOffsField: |
| 4021 |
return AddrSinkCombineBaseOffs; |
4021 |
return AddrSinkCombineBaseOffs; |
| 4022 |
case ExtAddrMode::ScaledRegField: |
4022 |
case ExtAddrMode::ScaledRegField: |
| 4023 |
return AddrSinkCombineScaledReg; |
4023 |
return AddrSinkCombineScaledReg; |
| 4024 |
} |
4024 |
} |
| 4025 |
} |
4025 |
} |
| 4026 |
}; |
4026 |
}; |
| 4027 |
} // end anonymous namespace |
4027 |
} // end anonymous namespace |
| 4028 |
|
4028 |
|
| 4029 |
/// Try adding ScaleReg*Scale to the current addressing mode. |
4029 |
/// Try adding ScaleReg*Scale to the current addressing mode. |
| 4030 |
/// Return true and update AddrMode if this addr mode is legal for the target, |
4030 |
/// Return true and update AddrMode if this addr mode is legal for the target, |
| 4031 |
/// false if not. |
4031 |
/// false if not. |
| 4032 |
bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, |
4032 |
bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, |
| 4033 |
unsigned Depth) { |
4033 |
unsigned Depth) { |
| 4034 |
// If Scale is 1, then this is the same as adding ScaleReg to the addressing |
4034 |
// If Scale is 1, then this is the same as adding ScaleReg to the addressing |
| 4035 |
// mode. Just process that directly. |
4035 |
// mode. Just process that directly. |
| 4036 |
if (Scale == 1) |
4036 |
if (Scale == 1) |
| 4037 |
return matchAddr(ScaleReg, Depth); |
4037 |
return matchAddr(ScaleReg, Depth); |
| 4038 |
|
4038 |
|
| 4039 |
// If the scale is 0, it takes nothing to add this. |
4039 |
// If the scale is 0, it takes nothing to add this. |
| 4040 |
if (Scale == 0) |
4040 |
if (Scale == 0) |
| 4041 |
return true; |
4041 |
return true; |
| 4042 |
|
4042 |
|
| 4043 |
// If we already have a scale of this value, we can add to it, otherwise, we |
4043 |
// If we already have a scale of this value, we can add to it, otherwise, we |
| 4044 |
// need an available scale field. |
4044 |
// need an available scale field. |
| 4045 |
if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) |
4045 |
if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) |
| 4046 |
return false; |
4046 |
return false; |
| 4047 |
|
4047 |
|
| 4048 |
ExtAddrMode TestAddrMode = AddrMode; |
4048 |
ExtAddrMode TestAddrMode = AddrMode; |
| 4049 |
|
4049 |
|
| 4050 |
// Add scale to turn X*4+X*3 -> X*7. This could also do things like |
4050 |
// Add scale to turn X*4+X*3 -> X*7. This could also do things like |
| 4051 |
// [A+B + A*7] -> [B+A*8]. |
4051 |
// [A+B + A*7] -> [B+A*8]. |
| 4052 |
TestAddrMode.Scale += Scale; |
4052 |
TestAddrMode.Scale += Scale; |
| 4053 |
TestAddrMode.ScaledReg = ScaleReg; |
4053 |
TestAddrMode.ScaledReg = ScaleReg; |
| 4054 |
|
4054 |
|
| 4055 |
// If the new address isn't legal, bail out. |
4055 |
// If the new address isn't legal, bail out. |
| 4056 |
if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) |
4056 |
if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) |
| 4057 |
return false; |
4057 |
return false; |
| 4058 |
|
4058 |
|
| 4059 |
// It was legal, so commit it. |
4059 |
// It was legal, so commit it. |
| 4060 |
AddrMode = TestAddrMode; |
4060 |
AddrMode = TestAddrMode; |
| 4061 |
|
4061 |
|
| 4062 |
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now |
4062 |
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now |
| 4063 |
// to see if ScaleReg is actually X+C. If so, we can turn this into adding |
4063 |
// to see if ScaleReg is actually X+C. If so, we can turn this into adding |
| 4064 |
// X*Scale + C*Scale to addr mode. If we found available IV increment, do not |
4064 |
// X*Scale + C*Scale to addr mode. If we found available IV increment, do not |
| 4065 |
// go any further: we can reuse it and cannot eliminate it. |
4065 |
// go any further: we can reuse it and cannot eliminate it. |
| 4066 |
ConstantInt *CI = nullptr; |
4066 |
ConstantInt *CI = nullptr; |
| 4067 |
Value *AddLHS = nullptr; |
4067 |
Value *AddLHS = nullptr; |
| 4068 |
if (isa(ScaleReg) && // not a constant expr. |
4068 |
if (isa(ScaleReg) && // not a constant expr. |
| 4069 |
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && |
4069 |
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) && |
| 4070 |
!isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) { |
4070 |
!isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) { |
| 4071 |
TestAddrMode.InBounds = false; |
4071 |
TestAddrMode.InBounds = false; |
| 4072 |
TestAddrMode.ScaledReg = AddLHS; |
4072 |
TestAddrMode.ScaledReg = AddLHS; |
| 4073 |
TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; |
4073 |
TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale; |
| 4074 |
|
4074 |
|
| 4075 |
// If this addressing mode is legal, commit it and remember that we folded |
4075 |
// If this addressing mode is legal, commit it and remember that we folded |
| 4076 |
// this instruction. |
4076 |
// this instruction. |
| 4077 |
if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { |
4077 |
if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { |
| 4078 |
AddrModeInsts.push_back(cast(ScaleReg)); |
4078 |
AddrModeInsts.push_back(cast(ScaleReg)); |
| 4079 |
AddrMode = TestAddrMode; |
4079 |
AddrMode = TestAddrMode; |
| 4080 |
return true; |
4080 |
return true; |
| 4081 |
} |
4081 |
} |
| 4082 |
// Restore status quo. |
4082 |
// Restore status quo. |
| 4083 |
TestAddrMode = AddrMode; |
4083 |
TestAddrMode = AddrMode; |
| 4084 |
} |
4084 |
} |
| 4085 |
|
4085 |
|
| 4086 |
// If this is an add recurrence with a constant step, return the increment |
4086 |
// If this is an add recurrence with a constant step, return the increment |
| 4087 |
// instruction and the canonicalized step. |
4087 |
// instruction and the canonicalized step. |
| 4088 |
auto GetConstantStep = |
4088 |
auto GetConstantStep = |
| 4089 |
[this](const Value *V) -> std::optional> { |
4089 |
[this](const Value *V) -> std::optional> { |
| 4090 |
auto *PN = dyn_cast(V); |
4090 |
auto *PN = dyn_cast(V); |
| 4091 |
if (!PN) |
4091 |
if (!PN) |
| 4092 |
return std::nullopt; |
4092 |
return std::nullopt; |
| 4093 |
auto IVInc = getIVIncrement(PN, &LI); |
4093 |
auto IVInc = getIVIncrement(PN, &LI); |
| 4094 |
if (!IVInc) |
4094 |
if (!IVInc) |
| 4095 |
return std::nullopt; |
4095 |
return std::nullopt; |
| 4096 |
// TODO: The result of the intrinsics above is two-complement. However when |
4096 |
// TODO: The result of the intrinsics above is two-complement. However when |
| 4097 |
// IV inc is expressed as add or sub, iv.next is potentially a poison value. |
4097 |
// IV inc is expressed as add or sub, iv.next is potentially a poison value. |
| 4098 |
// If it has nuw or nsw flags, we need to make sure that these flags are |
4098 |
// If it has nuw or nsw flags, we need to make sure that these flags are |
| 4099 |
// inferrable at the point of memory instruction. Otherwise we are replacing |
4099 |
// inferrable at the point of memory instruction. Otherwise we are replacing |
| 4100 |
// well-defined two-complement computation with poison. Currently, to avoid |
4100 |
// well-defined two-complement computation with poison. Currently, to avoid |
| 4101 |
// potentially complex analysis needed to prove this, we reject such cases. |
4101 |
// potentially complex analysis needed to prove this, we reject such cases. |
| 4102 |
if (auto *OIVInc = dyn_cast(IVInc->first)) |
4102 |
if (auto *OIVInc = dyn_cast(IVInc->first)) |
| 4103 |
if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) |
4103 |
if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) |
| 4104 |
return std::nullopt; |
4104 |
return std::nullopt; |
| 4105 |
if (auto *ConstantStep = dyn_cast(IVInc->second)) |
4105 |
if (auto *ConstantStep = dyn_cast(IVInc->second)) |
| 4106 |
return std::make_pair(IVInc->first, ConstantStep->getValue()); |
4106 |
return std::make_pair(IVInc->first, ConstantStep->getValue()); |
| 4107 |
return std::nullopt; |
4107 |
return std::nullopt; |
| 4108 |
}; |
4108 |
}; |
| 4109 |
|
4109 |
|
| 4110 |
// Try to account for the following special case: |
4110 |
// Try to account for the following special case: |
| 4111 |
// 1. ScaleReg is an inductive variable; |
4111 |
// 1. ScaleReg is an inductive variable; |
| 4112 |
// 2. We use it with non-zero offset; |
4112 |
// 2. We use it with non-zero offset; |
| 4113 |
// 3. IV's increment is available at the point of memory instruction. |
4113 |
// 3. IV's increment is available at the point of memory instruction. |
| 4114 |
// |
4114 |
// |
| 4115 |
// In this case, we may reuse the IV increment instead of the IV Phi to |
4115 |
// In this case, we may reuse the IV increment instead of the IV Phi to |
| 4116 |
// achieve the following advantages: |
4116 |
// achieve the following advantages: |
| 4117 |
// 1. If IV step matches the offset, we will have no need in the offset; |
4117 |
// 1. If IV step matches the offset, we will have no need in the offset; |
| 4118 |
// 2. Even if they don't match, we will reduce the overlap of living IV |
4118 |
// 2. Even if they don't match, we will reduce the overlap of living IV |
| 4119 |
// and IV increment, that will potentially lead to better register |
4119 |
// and IV increment, that will potentially lead to better register |
| 4120 |
// assignment. |
4120 |
// assignment. |
| 4121 |
if (AddrMode.BaseOffs) { |
4121 |
if (AddrMode.BaseOffs) { |
| 4122 |
if (auto IVStep = GetConstantStep(ScaleReg)) { |
4122 |
if (auto IVStep = GetConstantStep(ScaleReg)) { |
| 4123 |
Instruction *IVInc = IVStep->first; |
4123 |
Instruction *IVInc = IVStep->first; |
| 4124 |
// The following assert is important to ensure a lack of infinite loops. |
4124 |
// The following assert is important to ensure a lack of infinite loops. |
| 4125 |
// This transforms is (intentionally) the inverse of the one just above. |
4125 |
// This transforms is (intentionally) the inverse of the one just above. |
| 4126 |
// If they don't agree on the definition of an increment, we'd alternate |
4126 |
// If they don't agree on the definition of an increment, we'd alternate |
| 4127 |
// back and forth indefinitely. |
4127 |
// back and forth indefinitely. |
| 4128 |
assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep"); |
4128 |
assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep"); |
| 4129 |
APInt Step = IVStep->second; |
4129 |
APInt Step = IVStep->second; |
| 4130 |
APInt Offset = Step * AddrMode.Scale; |
4130 |
APInt Offset = Step * AddrMode.Scale; |
| 4131 |
if (Offset.isSignedIntN(64)) { |
4131 |
if (Offset.isSignedIntN(64)) { |
| 4132 |
TestAddrMode.InBounds = false; |
4132 |
TestAddrMode.InBounds = false; |
| 4133 |
TestAddrMode.ScaledReg = IVInc; |
4133 |
TestAddrMode.ScaledReg = IVInc; |
| 4134 |
TestAddrMode.BaseOffs -= Offset.getLimitedValue(); |
4134 |
TestAddrMode.BaseOffs -= Offset.getLimitedValue(); |
| 4135 |
// If this addressing mode is legal, commit it.. |
4135 |
// If this addressing mode is legal, commit it.. |
| 4136 |
// (Note that we defer the (expensive) domtree base legality check |
4136 |
// (Note that we defer the (expensive) domtree base legality check |
| 4137 |
// to the very last possible point.) |
4137 |
// to the very last possible point.) |
| 4138 |
if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) && |
4138 |
if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) && |
| 4139 |
getDTFn().dominates(IVInc, MemoryInst)) { |
4139 |
getDTFn().dominates(IVInc, MemoryInst)) { |
| 4140 |
AddrModeInsts.push_back(cast(IVInc)); |
4140 |
AddrModeInsts.push_back(cast(IVInc)); |
| 4141 |
AddrMode = TestAddrMode; |
4141 |
AddrMode = TestAddrMode; |
| 4142 |
return true; |
4142 |
return true; |
| 4143 |
} |
4143 |
} |
| 4144 |
// Restore status quo. |
4144 |
// Restore status quo. |
| 4145 |
TestAddrMode = AddrMode; |
4145 |
TestAddrMode = AddrMode; |
| 4146 |
} |
4146 |
} |
| 4147 |
} |
4147 |
} |
| 4148 |
} |
4148 |
} |
| 4149 |
|
4149 |
|
| 4150 |
// Otherwise, just return what we have. |
4150 |
// Otherwise, just return what we have. |
| 4151 |
return true; |
4151 |
return true; |
| 4152 |
} |
4152 |
} |
| 4153 |
|
4153 |
|
| 4154 |
/// This is a little filter, which returns true if an addressing computation |
4154 |
/// This is a little filter, which returns true if an addressing computation |
| 4155 |
/// involving I might be folded into a load/store accessing it. |
4155 |
/// involving I might be folded into a load/store accessing it. |
| 4156 |
/// This doesn't need to be perfect, but needs to accept at least |
4156 |
/// This doesn't need to be perfect, but needs to accept at least |
| 4157 |
/// the set of instructions that MatchOperationAddr can. |
4157 |
/// the set of instructions that MatchOperationAddr can. |
| 4158 |
static bool MightBeFoldableInst(Instruction *I) { |
4158 |
static bool MightBeFoldableInst(Instruction *I) { |
| 4159 |
switch (I->getOpcode()) { |
4159 |
switch (I->getOpcode()) { |
| 4160 |
case Instruction::BitCast: |
4160 |
case Instruction::BitCast: |
| 4161 |
case Instruction::AddrSpaceCast: |
4161 |
case Instruction::AddrSpaceCast: |
| 4162 |
// Don't touch identity bitcasts. |
4162 |
// Don't touch identity bitcasts. |
| 4163 |
if (I->getType() == I->getOperand(0)->getType()) |
4163 |
if (I->getType() == I->getOperand(0)->getType()) |
| 4164 |
return false; |
4164 |
return false; |
| 4165 |
return I->getType()->isIntOrPtrTy(); |
4165 |
return I->getType()->isIntOrPtrTy(); |
| 4166 |
case Instruction::PtrToInt: |
4166 |
case Instruction::PtrToInt: |
| 4167 |
// PtrToInt is always a noop, as we know that the int type is pointer sized. |
4167 |
// PtrToInt is always a noop, as we know that the int type is pointer sized. |
| 4168 |
return true; |
4168 |
return true; |
| 4169 |
case Instruction::IntToPtr: |
4169 |
case Instruction::IntToPtr: |
| 4170 |
// We know the input is intptr_t, so this is foldable. |
4170 |
// We know the input is intptr_t, so this is foldable. |
| 4171 |
return true; |
4171 |
return true; |
| 4172 |
case Instruction::Add: |
4172 |
case Instruction::Add: |
| 4173 |
return true; |
4173 |
return true; |
| 4174 |
case Instruction::Mul: |
4174 |
case Instruction::Mul: |
| 4175 |
case Instruction::Shl: |
4175 |
case Instruction::Shl: |
| 4176 |
// Can only handle X*C and X << C. |
4176 |
// Can only handle X*C and X << C. |
| 4177 |
return isa(I->getOperand(1)); |
4177 |
return isa(I->getOperand(1)); |
| 4178 |
case Instruction::GetElementPtr: |
4178 |
case Instruction::GetElementPtr: |
| 4179 |
return true; |
4179 |
return true; |
| 4180 |
default: |
4180 |
default: |
| 4181 |
return false; |
4181 |
return false; |
| 4182 |
} |
4182 |
} |
| 4183 |
} |
4183 |
} |
| 4184 |
|
4184 |
|
| 4185 |
/// Check whether or not \p Val is a legal instruction for \p TLI. |
4185 |
/// Check whether or not \p Val is a legal instruction for \p TLI. |
| 4186 |
/// \note \p Val is assumed to be the product of some type promotion. |
4186 |
/// \note \p Val is assumed to be the product of some type promotion. |
| 4187 |
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed |
4187 |
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed |
| 4188 |
/// to be legal, as the non-promoted value would have had the same state. |
4188 |
/// to be legal, as the non-promoted value would have had the same state. |
| 4189 |
static bool isPromotedInstructionLegal(const TargetLowering &TLI, |
4189 |
static bool isPromotedInstructionLegal(const TargetLowering &TLI, |
| 4190 |
const DataLayout &DL, Value *Val) { |
4190 |
const DataLayout &DL, Value *Val) { |
| 4191 |
Instruction *PromotedInst = dyn_cast(Val); |
4191 |
Instruction *PromotedInst = dyn_cast(Val); |
| 4192 |
if (!PromotedInst) |
4192 |
if (!PromotedInst) |
| 4193 |
return false; |
4193 |
return false; |
| 4194 |
int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); |
4194 |
int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); |
| 4195 |
// If the ISDOpcode is undefined, it was undefined before the promotion. |
4195 |
// If the ISDOpcode is undefined, it was undefined before the promotion. |
| 4196 |
if (!ISDOpcode) |
4196 |
if (!ISDOpcode) |
| 4197 |
return true; |
4197 |
return true; |
| 4198 |
// Otherwise, check if the promoted instruction is legal or not. |
4198 |
// Otherwise, check if the promoted instruction is legal or not. |
| 4199 |
return TLI.isOperationLegalOrCustom( |
4199 |
return TLI.isOperationLegalOrCustom( |
| 4200 |
ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); |
4200 |
ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); |
| 4201 |
} |
4201 |
} |
| 4202 |
|
4202 |
|
| 4203 |
namespace { |
4203 |
namespace { |
| 4204 |
|
4204 |
|
| 4205 |
/// Hepler class to perform type promotion. |
4205 |
/// Hepler class to perform type promotion. |
| 4206 |
class TypePromotionHelper { |
4206 |
class TypePromotionHelper { |
| 4207 |
/// Utility function to add a promoted instruction \p ExtOpnd to |
4207 |
/// Utility function to add a promoted instruction \p ExtOpnd to |
| 4208 |
/// \p PromotedInsts and record the type of extension we have seen. |
4208 |
/// \p PromotedInsts and record the type of extension we have seen. |
| 4209 |
static void addPromotedInst(InstrToOrigTy &PromotedInsts, |
4209 |
static void addPromotedInst(InstrToOrigTy &PromotedInsts, |
| 4210 |
Instruction *ExtOpnd, bool IsSExt) { |
4210 |
Instruction *ExtOpnd, bool IsSExt) { |
| 4211 |
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; |
4211 |
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; |
| 4212 |
InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); |
4212 |
InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); |
| 4213 |
if (It != PromotedInsts.end()) { |
4213 |
if (It != PromotedInsts.end()) { |
| 4214 |
// If the new extension is same as original, the information in |
4214 |
// If the new extension is same as original, the information in |
| 4215 |
// PromotedInsts[ExtOpnd] is still correct. |
4215 |
// PromotedInsts[ExtOpnd] is still correct. |
| 4216 |
if (It->second.getInt() == ExtTy) |
4216 |
if (It->second.getInt() == ExtTy) |
| 4217 |
return; |
4217 |
return; |
| 4218 |
|
4218 |
|
| 4219 |
// Now the new extension is different from old extension, we make |
4219 |
// Now the new extension is different from old extension, we make |
| 4220 |
// the type information invalid by setting extension type to |
4220 |
// the type information invalid by setting extension type to |
| 4221 |
// BothExtension. |
4221 |
// BothExtension. |
| 4222 |
ExtTy = BothExtension; |
4222 |
ExtTy = BothExtension; |
| 4223 |
} |
4223 |
} |
| 4224 |
PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy); |
4224 |
PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy); |
| 4225 |
} |
4225 |
} |
| 4226 |
|
4226 |
|
| 4227 |
/// Utility function to query the original type of instruction \p Opnd |
4227 |
/// Utility function to query the original type of instruction \p Opnd |
| 4228 |
/// with a matched extension type. If the extension doesn't match, we |
4228 |
/// with a matched extension type. If the extension doesn't match, we |
| 4229 |
/// cannot use the information we had on the original type. |
4229 |
/// cannot use the information we had on the original type. |
| 4230 |
/// BothExtension doesn't match any extension type. |
4230 |
/// BothExtension doesn't match any extension type. |
| 4231 |
static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, |
4231 |
static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, |
| 4232 |
Instruction *Opnd, bool IsSExt) { |
4232 |
Instruction *Opnd, bool IsSExt) { |
| 4233 |
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; |
4233 |
ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; |
| 4234 |
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); |
4234 |
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); |
| 4235 |
if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) |
4235 |
if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) |
| 4236 |
return It->second.getPointer(); |
4236 |
return It->second.getPointer(); |
| 4237 |
return nullptr; |
4237 |
return nullptr; |
| 4238 |
} |
4238 |
} |
| 4239 |
|
4239 |
|
| 4240 |
/// Utility function to check whether or not a sign or zero extension |
4240 |
/// Utility function to check whether or not a sign or zero extension |
| 4241 |
/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by |
4241 |
/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by |
| 4242 |
/// either using the operands of \p Inst or promoting \p Inst. |
4242 |
/// either using the operands of \p Inst or promoting \p Inst. |
| 4243 |
/// The type of the extension is defined by \p IsSExt. |
4243 |
/// The type of the extension is defined by \p IsSExt. |
| 4244 |
/// In other words, check if: |
4244 |
/// In other words, check if: |
| 4245 |
/// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. |
4245 |
/// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. |
| 4246 |
/// #1 Promotion applies: |
4246 |
/// #1 Promotion applies: |
| 4247 |
/// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). |
4247 |
/// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). |
| 4248 |
/// #2 Operand reuses: |
4248 |
/// #2 Operand reuses: |
| 4249 |
/// ext opnd1 to ConsideredExtType. |
4249 |
/// ext opnd1 to ConsideredExtType. |
| 4250 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
4250 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
| 4251 |
static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, |
4251 |
static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, |
| 4252 |
const InstrToOrigTy &PromotedInsts, bool IsSExt); |
4252 |
const InstrToOrigTy &PromotedInsts, bool IsSExt); |
| 4253 |
|
4253 |
|
| 4254 |
/// Utility function to determine if \p OpIdx should be promoted when |
4254 |
/// Utility function to determine if \p OpIdx should be promoted when |
| 4255 |
/// promoting \p Inst. |
4255 |
/// promoting \p Inst. |
| 4256 |
static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { |
4256 |
static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { |
| 4257 |
return !(isa(Inst) && OpIdx == 0); |
4257 |
return !(isa(Inst) && OpIdx == 0); |
| 4258 |
} |
4258 |
} |
| 4259 |
|
4259 |
|
| 4260 |
/// Utility function to promote the operand of \p Ext when this |
4260 |
/// Utility function to promote the operand of \p Ext when this |
| 4261 |
/// operand is a promotable trunc or sext or zext. |
4261 |
/// operand is a promotable trunc or sext or zext. |
| 4262 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
4262 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
| 4263 |
/// \p CreatedInstsCost[out] contains the cost of all instructions |
4263 |
/// \p CreatedInstsCost[out] contains the cost of all instructions |
| 4264 |
/// created to promote the operand of Ext. |
4264 |
/// created to promote the operand of Ext. |
| 4265 |
/// Newly added extensions are inserted in \p Exts. |
4265 |
/// Newly added extensions are inserted in \p Exts. |
| 4266 |
/// Newly added truncates are inserted in \p Truncs. |
4266 |
/// Newly added truncates are inserted in \p Truncs. |
| 4267 |
/// Should never be called directly. |
4267 |
/// Should never be called directly. |
| 4268 |
/// \return The promoted value which is used instead of Ext. |
4268 |
/// \return The promoted value which is used instead of Ext. |
| 4269 |
static Value *promoteOperandForTruncAndAnyExt( |
4269 |
static Value *promoteOperandForTruncAndAnyExt( |
| 4270 |
Instruction *Ext, TypePromotionTransaction &TPT, |
4270 |
Instruction *Ext, TypePromotionTransaction &TPT, |
| 4271 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
4271 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
| 4272 |
SmallVectorImpl *Exts, |
4272 |
SmallVectorImpl *Exts, |
| 4273 |
SmallVectorImpl *Truncs, const TargetLowering &TLI); |
4273 |
SmallVectorImpl *Truncs, const TargetLowering &TLI); |
| 4274 |
|
4274 |
|
| 4275 |
/// Utility function to promote the operand of \p Ext when this |
4275 |
/// Utility function to promote the operand of \p Ext when this |
| 4276 |
/// operand is promotable and is not a supported trunc or sext. |
4276 |
/// operand is promotable and is not a supported trunc or sext. |
| 4277 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
4277 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
| 4278 |
/// \p CreatedInstsCost[out] contains the cost of all the instructions |
4278 |
/// \p CreatedInstsCost[out] contains the cost of all the instructions |
| 4279 |
/// created to promote the operand of Ext. |
4279 |
/// created to promote the operand of Ext. |
| 4280 |
/// Newly added extensions are inserted in \p Exts. |
4280 |
/// Newly added extensions are inserted in \p Exts. |
| 4281 |
/// Newly added truncates are inserted in \p Truncs. |
4281 |
/// Newly added truncates are inserted in \p Truncs. |
| 4282 |
/// Should never be called directly. |
4282 |
/// Should never be called directly. |
| 4283 |
/// \return The promoted value which is used instead of Ext. |
4283 |
/// \return The promoted value which is used instead of Ext. |
| 4284 |
static Value *promoteOperandForOther(Instruction *Ext, |
4284 |
static Value *promoteOperandForOther(Instruction *Ext, |
| 4285 |
TypePromotionTransaction &TPT, |
4285 |
TypePromotionTransaction &TPT, |
| 4286 |
InstrToOrigTy &PromotedInsts, |
4286 |
InstrToOrigTy &PromotedInsts, |
| 4287 |
unsigned &CreatedInstsCost, |
4287 |
unsigned &CreatedInstsCost, |
| 4288 |
SmallVectorImpl *Exts, |
4288 |
SmallVectorImpl *Exts, |
| 4289 |
SmallVectorImpl *Truncs, |
4289 |
SmallVectorImpl *Truncs, |
| 4290 |
const TargetLowering &TLI, bool IsSExt); |
4290 |
const TargetLowering &TLI, bool IsSExt); |
| 4291 |
|
4291 |
|
| 4292 |
/// \see promoteOperandForOther. |
4292 |
/// \see promoteOperandForOther. |
| 4293 |
static Value *signExtendOperandForOther( |
4293 |
static Value *signExtendOperandForOther( |
| 4294 |
Instruction *Ext, TypePromotionTransaction &TPT, |
4294 |
Instruction *Ext, TypePromotionTransaction &TPT, |
| 4295 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
4295 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
| 4296 |
SmallVectorImpl *Exts, |
4296 |
SmallVectorImpl *Exts, |
| 4297 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
4297 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
| 4298 |
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, |
4298 |
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, |
| 4299 |
Exts, Truncs, TLI, true); |
4299 |
Exts, Truncs, TLI, true); |
| 4300 |
} |
4300 |
} |
| 4301 |
|
4301 |
|
| 4302 |
/// \see promoteOperandForOther. |
4302 |
/// \see promoteOperandForOther. |
| 4303 |
static Value *zeroExtendOperandForOther( |
4303 |
static Value *zeroExtendOperandForOther( |
| 4304 |
Instruction *Ext, TypePromotionTransaction &TPT, |
4304 |
Instruction *Ext, TypePromotionTransaction &TPT, |
| 4305 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
4305 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
| 4306 |
SmallVectorImpl *Exts, |
4306 |
SmallVectorImpl *Exts, |
| 4307 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
4307 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
| 4308 |
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, |
4308 |
return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, |
| 4309 |
Exts, Truncs, TLI, false); |
4309 |
Exts, Truncs, TLI, false); |
| 4310 |
} |
4310 |
} |
| 4311 |
|
4311 |
|
| 4312 |
public: |
4312 |
public: |
| 4313 |
/// Type for the utility function that promotes the operand of Ext. |
4313 |
/// Type for the utility function that promotes the operand of Ext. |
| 4314 |
using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT, |
4314 |
using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT, |
| 4315 |
InstrToOrigTy &PromotedInsts, |
4315 |
InstrToOrigTy &PromotedInsts, |
| 4316 |
unsigned &CreatedInstsCost, |
4316 |
unsigned &CreatedInstsCost, |
| 4317 |
SmallVectorImpl *Exts, |
4317 |
SmallVectorImpl *Exts, |
| 4318 |
SmallVectorImpl *Truncs, |
4318 |
SmallVectorImpl *Truncs, |
| 4319 |
const TargetLowering &TLI); |
4319 |
const TargetLowering &TLI); |
| 4320 |
|
4320 |
|
| 4321 |
/// Given a sign/zero extend instruction \p Ext, return the appropriate |
4321 |
/// Given a sign/zero extend instruction \p Ext, return the appropriate |
| 4322 |
/// action to promote the operand of \p Ext instead of using Ext. |
4322 |
/// action to promote the operand of \p Ext instead of using Ext. |
| 4323 |
/// \return NULL if no promotable action is possible with the current |
4323 |
/// \return NULL if no promotable action is possible with the current |
| 4324 |
/// sign extension. |
4324 |
/// sign extension. |
| 4325 |
/// \p InsertedInsts keeps track of all the instructions inserted by the |
4325 |
/// \p InsertedInsts keeps track of all the instructions inserted by the |
| 4326 |
/// other CodeGenPrepare optimizations. This information is important |
4326 |
/// other CodeGenPrepare optimizations. This information is important |
| 4327 |
/// because we do not want to promote these instructions as CodeGenPrepare |
4327 |
/// because we do not want to promote these instructions as CodeGenPrepare |
| 4328 |
/// will reinsert them later. Thus creating an infinite loop: create/remove. |
4328 |
/// will reinsert them later. Thus creating an infinite loop: create/remove. |
| 4329 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
4329 |
/// \p PromotedInsts maps the instructions to their type before promotion. |
| 4330 |
static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, |
4330 |
static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, |
| 4331 |
const TargetLowering &TLI, |
4331 |
const TargetLowering &TLI, |
| 4332 |
const InstrToOrigTy &PromotedInsts); |
4332 |
const InstrToOrigTy &PromotedInsts); |
| 4333 |
}; |
4333 |
}; |
| 4334 |
|
4334 |
|
| 4335 |
} // end anonymous namespace |
4335 |
} // end anonymous namespace |
| 4336 |
|
4336 |
|
| 4337 |
bool TypePromotionHelper::canGetThrough(const Instruction *Inst, |
4337 |
bool TypePromotionHelper::canGetThrough(const Instruction *Inst, |
| 4338 |
Type *ConsideredExtType, |
4338 |
Type *ConsideredExtType, |
| 4339 |
const InstrToOrigTy &PromotedInsts, |
4339 |
const InstrToOrigTy &PromotedInsts, |
| 4340 |
bool IsSExt) { |
4340 |
bool IsSExt) { |
| 4341 |
// The promotion helper does not know how to deal with vector types yet. |
4341 |
// The promotion helper does not know how to deal with vector types yet. |
| 4342 |
// To be able to fix that, we would need to fix the places where we |
4342 |
// To be able to fix that, we would need to fix the places where we |
| 4343 |
// statically extend, e.g., constants and such. |
4343 |
// statically extend, e.g., constants and such. |
| 4344 |
if (Inst->getType()->isVectorTy()) |
4344 |
if (Inst->getType()->isVectorTy()) |
| 4345 |
return false; |
4345 |
return false; |
| 4346 |
|
4346 |
|
| 4347 |
// We can always get through zext. |
4347 |
// We can always get through zext. |
| 4348 |
if (isa(Inst)) |
4348 |
if (isa(Inst)) |
| 4349 |
return true; |
4349 |
return true; |
| 4350 |
|
4350 |
|
| 4351 |
// sext(sext) is ok too. |
4351 |
// sext(sext) is ok too. |
| 4352 |
if (IsSExt && isa(Inst)) |
4352 |
if (IsSExt && isa(Inst)) |
| 4353 |
return true; |
4353 |
return true; |
| 4354 |
|
4354 |
|
| 4355 |
// We can get through binary operator, if it is legal. In other words, the |
4355 |
// We can get through binary operator, if it is legal. In other words, the |
| 4356 |
// binary operator must have a nuw or nsw flag. |
4356 |
// binary operator must have a nuw or nsw flag. |
| 4357 |
if (const auto *BinOp = dyn_cast(Inst)) |
4357 |
if (const auto *BinOp = dyn_cast(Inst)) |
| 4358 |
if (isa(BinOp) && |
4358 |
if (isa(BinOp) && |
| 4359 |
((!IsSExt && BinOp->hasNoUnsignedWrap()) || |
4359 |
((!IsSExt && BinOp->hasNoUnsignedWrap()) || |
| 4360 |
(IsSExt && BinOp->hasNoSignedWrap()))) |
4360 |
(IsSExt && BinOp->hasNoSignedWrap()))) |
| 4361 |
return true; |
4361 |
return true; |
| 4362 |
|
4362 |
|
| 4363 |
// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) |
4363 |
// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) |
| 4364 |
if ((Inst->getOpcode() == Instruction::And || |
4364 |
if ((Inst->getOpcode() == Instruction::And || |
| 4365 |
Inst->getOpcode() == Instruction::Or)) |
4365 |
Inst->getOpcode() == Instruction::Or)) |
| 4366 |
return true; |
4366 |
return true; |
| 4367 |
|
4367 |
|
| 4368 |
// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) |
4368 |
// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) |
| 4369 |
if (Inst->getOpcode() == Instruction::Xor) { |
4369 |
if (Inst->getOpcode() == Instruction::Xor) { |
| 4370 |
// Make sure it is not a NOT. |
4370 |
// Make sure it is not a NOT. |
| 4371 |
if (const auto *Cst = dyn_cast(Inst->getOperand(1))) |
4371 |
if (const auto *Cst = dyn_cast(Inst->getOperand(1))) |
| 4372 |
if (!Cst->getValue().isAllOnes()) |
4372 |
if (!Cst->getValue().isAllOnes()) |
| 4373 |
return true; |
4373 |
return true; |
| 4374 |
} |
4374 |
} |
| 4375 |
|
4375 |
|
| 4376 |
// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) |
4376 |
// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) |
| 4377 |
// It may change a poisoned value into a regular value, like |
4377 |
// It may change a poisoned value into a regular value, like |
| 4378 |
// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 |
4378 |
// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 |
| 4379 |
// poisoned value regular value |
4379 |
// poisoned value regular value |
| 4380 |
// It should be OK since undef covers valid value. |
4380 |
// It should be OK since undef covers valid value. |
| 4381 |
if (Inst->getOpcode() == Instruction::LShr && !IsSExt) |
4381 |
if (Inst->getOpcode() == Instruction::LShr && !IsSExt) |
| 4382 |
return true; |
4382 |
return true; |
| 4383 |
|
4383 |
|
| 4384 |
// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) |
4384 |
// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) |
| 4385 |
// It may change a poisoned value into a regular value, like |
4385 |
// It may change a poisoned value into a regular value, like |
| 4386 |
// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 |
4386 |
// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 |
| 4387 |
// poisoned value regular value |
4387 |
// poisoned value regular value |
| 4388 |
// It should be OK since undef covers valid value. |
4388 |
// It should be OK since undef covers valid value. |
| 4389 |
if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { |
4389 |
if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { |
| 4390 |
const auto *ExtInst = cast(*Inst->user_begin()); |
4390 |
const auto *ExtInst = cast(*Inst->user_begin()); |
| 4391 |
if (ExtInst->hasOneUse()) { |
4391 |
if (ExtInst->hasOneUse()) { |
| 4392 |
const auto *AndInst = dyn_cast(*ExtInst->user_begin()); |
4392 |
const auto *AndInst = dyn_cast(*ExtInst->user_begin()); |
| 4393 |
if (AndInst && AndInst->getOpcode() == Instruction::And) { |
4393 |
if (AndInst && AndInst->getOpcode() == Instruction::And) { |
| 4394 |
const auto *Cst = dyn_cast(AndInst->getOperand(1)); |
4394 |
const auto *Cst = dyn_cast(AndInst->getOperand(1)); |
| 4395 |
if (Cst && |
4395 |
if (Cst && |
| 4396 |
Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) |
4396 |
Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) |
| 4397 |
return true; |
4397 |
return true; |
| 4398 |
} |
4398 |
} |
| 4399 |
} |
4399 |
} |
| 4400 |
} |
4400 |
} |
| 4401 |
|
4401 |
|
| 4402 |
// Check if we can do the following simplification. |
4402 |
// Check if we can do the following simplification. |
| 4403 |
// ext(trunc(opnd)) --> ext(opnd) |
4403 |
// ext(trunc(opnd)) --> ext(opnd) |
| 4404 |
if (!isa(Inst)) |
4404 |
if (!isa(Inst)) |
| 4405 |
return false; |
4405 |
return false; |
| 4406 |
|
4406 |
|
| 4407 |
Value *OpndVal = Inst->getOperand(0); |
4407 |
Value *OpndVal = Inst->getOperand(0); |
| 4408 |
// Check if we can use this operand in the extension. |
4408 |
// Check if we can use this operand in the extension. |
| 4409 |
// If the type is larger than the result type of the extension, we cannot. |
4409 |
// If the type is larger than the result type of the extension, we cannot. |
| 4410 |
if (!OpndVal->getType()->isIntegerTy() || |
4410 |
if (!OpndVal->getType()->isIntegerTy() || |
| 4411 |
OpndVal->getType()->getIntegerBitWidth() > |
4411 |
OpndVal->getType()->getIntegerBitWidth() > |
| 4412 |
ConsideredExtType->getIntegerBitWidth()) |
4412 |
ConsideredExtType->getIntegerBitWidth()) |
| 4413 |
return false; |
4413 |
return false; |
| 4414 |
|
4414 |
|
| 4415 |
// If the operand of the truncate is not an instruction, we will not have |
4415 |
// If the operand of the truncate is not an instruction, we will not have |
| 4416 |
// any information on the dropped bits. |
4416 |
// any information on the dropped bits. |
| 4417 |
// (Actually we could for constant but it is not worth the extra logic). |
4417 |
// (Actually we could for constant but it is not worth the extra logic). |
| 4418 |
Instruction *Opnd = dyn_cast(OpndVal); |
4418 |
Instruction *Opnd = dyn_cast(OpndVal); |
| 4419 |
if (!Opnd) |
4419 |
if (!Opnd) |
| 4420 |
return false; |
4420 |
return false; |
| 4421 |
|
4421 |
|
| 4422 |
// Check if the source of the type is narrow enough. |
4422 |
// Check if the source of the type is narrow enough. |
| 4423 |
// I.e., check that trunc just drops extended bits of the same kind of |
4423 |
// I.e., check that trunc just drops extended bits of the same kind of |
| 4424 |
// the extension. |
4424 |
// the extension. |
| 4425 |
// #1 get the type of the operand and check the kind of the extended bits. |
4425 |
// #1 get the type of the operand and check the kind of the extended bits. |
| 4426 |
const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt); |
4426 |
const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt); |
| 4427 |
if (OpndType) |
4427 |
if (OpndType) |
| 4428 |
; |
4428 |
; |
| 4429 |
else if ((IsSExt && isa(Opnd)) || (!IsSExt && isa(Opnd))) |
4429 |
else if ((IsSExt && isa(Opnd)) || (!IsSExt && isa(Opnd))) |
| 4430 |
OpndType = Opnd->getOperand(0)->getType(); |
4430 |
OpndType = Opnd->getOperand(0)->getType(); |
| 4431 |
else |
4431 |
else |
| 4432 |
return false; |
4432 |
return false; |
| 4433 |
|
4433 |
|
| 4434 |
// #2 check that the truncate just drops extended bits. |
4434 |
// #2 check that the truncate just drops extended bits. |
| 4435 |
return Inst->getType()->getIntegerBitWidth() >= |
4435 |
return Inst->getType()->getIntegerBitWidth() >= |
| 4436 |
OpndType->getIntegerBitWidth(); |
4436 |
OpndType->getIntegerBitWidth(); |
| 4437 |
} |
4437 |
} |
| 4438 |
|
4438 |
|
| 4439 |
TypePromotionHelper::Action TypePromotionHelper::getAction( |
4439 |
TypePromotionHelper::Action TypePromotionHelper::getAction( |
| 4440 |
Instruction *Ext, const SetOfInstrs &InsertedInsts, |
4440 |
Instruction *Ext, const SetOfInstrs &InsertedInsts, |
| 4441 |
const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { |
4441 |
const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { |
| 4442 |
assert((isa(Ext) || isa(Ext)) && |
4442 |
assert((isa(Ext) || isa(Ext)) && |
| 4443 |
"Unexpected instruction type"); |
4443 |
"Unexpected instruction type"); |
| 4444 |
Instruction *ExtOpnd = dyn_cast(Ext->getOperand(0)); |
4444 |
Instruction *ExtOpnd = dyn_cast(Ext->getOperand(0)); |
| 4445 |
Type *ExtTy = Ext->getType(); |
4445 |
Type *ExtTy = Ext->getType(); |
| 4446 |
bool IsSExt = isa(Ext); |
4446 |
bool IsSExt = isa(Ext); |
| 4447 |
// If the operand of the extension is not an instruction, we cannot |
4447 |
// If the operand of the extension is not an instruction, we cannot |
| 4448 |
// get through. |
4448 |
// get through. |
| 4449 |
// If it, check we can get through. |
4449 |
// If it, check we can get through. |
| 4450 |
if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) |
4450 |
if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) |
| 4451 |
return nullptr; |
4451 |
return nullptr; |
| 4452 |
|
4452 |
|
| 4453 |
// Do not promote if the operand has been added by codegenprepare. |
4453 |
// Do not promote if the operand has been added by codegenprepare. |
| 4454 |
// Otherwise, it means we are undoing an optimization that is likely to be |
4454 |
// Otherwise, it means we are undoing an optimization that is likely to be |
| 4455 |
// redone, thus causing potential infinite loop. |
4455 |
// redone, thus causing potential infinite loop. |
| 4456 |
if (isa(ExtOpnd) && InsertedInsts.count(ExtOpnd)) |
4456 |
if (isa(ExtOpnd) && InsertedInsts.count(ExtOpnd)) |
| 4457 |
return nullptr; |
4457 |
return nullptr; |
| 4458 |
|
4458 |
|
| 4459 |
// SExt or Trunc instructions. |
4459 |
// SExt or Trunc instructions. |
| 4460 |
// Return the related handler. |
4460 |
// Return the related handler. |
| 4461 |
if (isa(ExtOpnd) || isa(ExtOpnd) || |
4461 |
if (isa(ExtOpnd) || isa(ExtOpnd) || |
| 4462 |
isa(ExtOpnd)) |
4462 |
isa(ExtOpnd)) |
| 4463 |
return promoteOperandForTruncAndAnyExt; |
4463 |
return promoteOperandForTruncAndAnyExt; |
| 4464 |
|
4464 |
|
| 4465 |
// Regular instruction. |
4465 |
// Regular instruction. |
| 4466 |
// Abort early if we will have to insert non-free instructions. |
4466 |
// Abort early if we will have to insert non-free instructions. |
| 4467 |
if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) |
4467 |
if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) |
| 4468 |
return nullptr; |
4468 |
return nullptr; |
| 4469 |
return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; |
4469 |
return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; |
| 4470 |
} |
4470 |
} |
| 4471 |
|
4471 |
|
| 4472 |
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( |
4472 |
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( |
| 4473 |
Instruction *SExt, TypePromotionTransaction &TPT, |
4473 |
Instruction *SExt, TypePromotionTransaction &TPT, |
| 4474 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
4474 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
| 4475 |
SmallVectorImpl *Exts, |
4475 |
SmallVectorImpl *Exts, |
| 4476 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
4476 |
SmallVectorImpl *Truncs, const TargetLowering &TLI) { |
| 4477 |
// By construction, the operand of SExt is an instruction. Otherwise we cannot |
4477 |
// By construction, the operand of SExt is an instruction. Otherwise we cannot |
| 4478 |
// get through it and this method should not be called. |
4478 |
// get through it and this method should not be called. |
| 4479 |
Instruction *SExtOpnd = cast(SExt->getOperand(0)); |
4479 |
Instruction *SExtOpnd = cast(SExt->getOperand(0)); |
| 4480 |
Value *ExtVal = SExt; |
4480 |
Value *ExtVal = SExt; |
| 4481 |
bool HasMergedNonFreeExt = false; |
4481 |
bool HasMergedNonFreeExt = false; |
| 4482 |
if (isa(SExtOpnd)) { |
4482 |
if (isa(SExtOpnd)) { |
| 4483 |
// Replace s|zext(zext(opnd)) |
4483 |
// Replace s|zext(zext(opnd)) |
| 4484 |
// => zext(opnd). |
4484 |
// => zext(opnd). |
| 4485 |
HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); |
4485 |
HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); |
| 4486 |
Value *ZExt = |
4486 |
Value *ZExt = |
| 4487 |
TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); |
4487 |
TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); |
| 4488 |
TPT.replaceAllUsesWith(SExt, ZExt); |
4488 |
TPT.replaceAllUsesWith(SExt, ZExt); |
| 4489 |
TPT.eraseInstruction(SExt); |
4489 |
TPT.eraseInstruction(SExt); |
| 4490 |
ExtVal = ZExt; |
4490 |
ExtVal = ZExt; |
| 4491 |
} else { |
4491 |
} else { |
| 4492 |
// Replace z|sext(trunc(opnd)) or sext(sext(opnd)) |
4492 |
// Replace z|sext(trunc(opnd)) or sext(sext(opnd)) |
| 4493 |
// => z|sext(opnd). |
4493 |
// => z|sext(opnd). |
| 4494 |
TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); |
4494 |
TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); |
| 4495 |
} |
4495 |
} |
| 4496 |
CreatedInstsCost = 0; |
4496 |
CreatedInstsCost = 0; |
| 4497 |
|
4497 |
|
| 4498 |
// Remove dead code. |
4498 |
// Remove dead code. |
| 4499 |
if (SExtOpnd->use_empty()) |
4499 |
if (SExtOpnd->use_empty()) |
| 4500 |
TPT.eraseInstruction(SExtOpnd); |
4500 |
TPT.eraseInstruction(SExtOpnd); |
| 4501 |
|
4501 |
|
| 4502 |
// Check if the extension is still needed. |
4502 |
// Check if the extension is still needed. |
| 4503 |
Instruction *ExtInst = dyn_cast(ExtVal); |
4503 |
Instruction *ExtInst = dyn_cast(ExtVal); |
| 4504 |
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { |
4504 |
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { |
| 4505 |
if (ExtInst) { |
4505 |
if (ExtInst) { |
| 4506 |
if (Exts) |
4506 |
if (Exts) |
| 4507 |
Exts->push_back(ExtInst); |
4507 |
Exts->push_back(ExtInst); |
| 4508 |
CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; |
4508 |
CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; |
| 4509 |
} |
4509 |
} |
| 4510 |
return ExtVal; |
4510 |
return ExtVal; |
| 4511 |
} |
4511 |
} |
| 4512 |
|
4512 |
|
| 4513 |
// At this point we have: ext ty opnd to ty. |
4513 |
// At this point we have: ext ty opnd to ty. |
| 4514 |
// Reassign the uses of ExtInst to the opnd and remove ExtInst. |
4514 |
// Reassign the uses of ExtInst to the opnd and remove ExtInst. |
| 4515 |
Value *NextVal = ExtInst->getOperand(0); |
4515 |
Value *NextVal = ExtInst->getOperand(0); |
| 4516 |
TPT.eraseInstruction(ExtInst, NextVal); |
4516 |
TPT.eraseInstruction(ExtInst, NextVal); |
| 4517 |
return NextVal; |
4517 |
return NextVal; |
| 4518 |
} |
4518 |
} |
| 4519 |
|
4519 |
|
| 4520 |
Value *TypePromotionHelper::promoteOperandForOther( |
4520 |
Value *TypePromotionHelper::promoteOperandForOther( |
| 4521 |
Instruction *Ext, TypePromotionTransaction &TPT, |
4521 |
Instruction *Ext, TypePromotionTransaction &TPT, |
| 4522 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
4522 |
InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, |
| 4523 |
SmallVectorImpl *Exts, |
4523 |
SmallVectorImpl *Exts, |
| 4524 |
SmallVectorImpl *Truncs, const TargetLowering &TLI, |
4524 |
SmallVectorImpl *Truncs, const TargetLowering &TLI, |
| 4525 |
bool IsSExt) { |
4525 |
bool IsSExt) { |
| 4526 |
// By construction, the operand of Ext is an instruction. Otherwise we cannot |
4526 |
// By construction, the operand of Ext is an instruction. Otherwise we cannot |
| 4527 |
// get through it and this method should not be called. |
4527 |
// get through it and this method should not be called. |
| 4528 |
Instruction *ExtOpnd = cast(Ext->getOperand(0)); |
4528 |
Instruction *ExtOpnd = cast(Ext->getOperand(0)); |
| 4529 |
CreatedInstsCost = 0; |
4529 |
CreatedInstsCost = 0; |
| 4530 |
if (!ExtOpnd->hasOneUse()) { |
4530 |
if (!ExtOpnd->hasOneUse()) { |
| 4531 |
// ExtOpnd will be promoted. |
4531 |
// ExtOpnd will be promoted. |
| 4532 |
// All its uses, but Ext, will need to use a truncated value of the |
4532 |
// All its uses, but Ext, will need to use a truncated value of the |
| 4533 |
// promoted version. |
4533 |
// promoted version. |
| 4534 |
// Create the truncate now. |
4534 |
// Create the truncate now. |
| 4535 |
Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); |
4535 |
Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); |
| 4536 |
if (Instruction *ITrunc = dyn_cast(Trunc)) { |
4536 |
if (Instruction *ITrunc = dyn_cast(Trunc)) { |
| 4537 |
// Insert it just after the definition. |
4537 |
// Insert it just after the definition. |
| 4538 |
ITrunc->moveAfter(ExtOpnd); |
4538 |
ITrunc->moveAfter(ExtOpnd); |
| 4539 |
if (Truncs) |
4539 |
if (Truncs) |
| 4540 |
Truncs->push_back(ITrunc); |
4540 |
Truncs->push_back(ITrunc); |
| 4541 |
} |
4541 |
} |
| 4542 |
|
4542 |
|
| 4543 |
TPT.replaceAllUsesWith(ExtOpnd, Trunc); |
4543 |
TPT.replaceAllUsesWith(ExtOpnd, Trunc); |
| 4544 |
// Restore the operand of Ext (which has been replaced by the previous call |
4544 |
// Restore the operand of Ext (which has been replaced by the previous call |
| 4545 |
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. |
4545 |
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. |
| 4546 |
TPT.setOperand(Ext, 0, ExtOpnd); |
4546 |
TPT.setOperand(Ext, 0, ExtOpnd); |
| 4547 |
} |
4547 |
} |
| 4548 |
|
4548 |
|
| 4549 |
// Get through the Instruction: |
4549 |
// Get through the Instruction: |
| 4550 |
// 1. Update its type. |
4550 |
// 1. Update its type. |
| 4551 |
// 2. Replace the uses of Ext by Inst. |
4551 |
// 2. Replace the uses of Ext by Inst. |
| 4552 |
// 3. Extend each operand that needs to be extended. |
4552 |
// 3. Extend each operand that needs to be extended. |
| 4553 |
|
4553 |
|
| 4554 |
// Remember the original type of the instruction before promotion. |
4554 |
// Remember the original type of the instruction before promotion. |
| 4555 |
// This is useful to know that the high bits are sign extended bits. |
4555 |
// This is useful to know that the high bits are sign extended bits. |
| 4556 |
addPromotedInst(PromotedInsts, ExtOpnd, IsSExt); |
4556 |
addPromotedInst(PromotedInsts, ExtOpnd, IsSExt); |
| 4557 |
// Step #1. |
4557 |
// Step #1. |
| 4558 |
TPT.mutateType(ExtOpnd, Ext->getType()); |
4558 |
TPT.mutateType(ExtOpnd, Ext->getType()); |
| 4559 |
// Step #2. |
4559 |
// Step #2. |
| 4560 |
TPT.replaceAllUsesWith(Ext, ExtOpnd); |
4560 |
TPT.replaceAllUsesWith(Ext, ExtOpnd); |
| 4561 |
// Step #3. |
4561 |
// Step #3. |
| 4562 |
Instruction *ExtForOpnd = Ext; |
4562 |
Instruction *ExtForOpnd = Ext; |
| 4563 |
|
4563 |
|
| 4564 |
LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n"); |
4564 |
LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n"); |
| 4565 |
for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; |
4565 |
for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; |
| 4566 |
++OpIdx) { |
4566 |
++OpIdx) { |
| 4567 |
LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); |
4567 |
LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); |
| 4568 |
if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || |
4568 |
if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || |
| 4569 |
!shouldExtOperand(ExtOpnd, OpIdx)) { |
4569 |
!shouldExtOperand(ExtOpnd, OpIdx)) { |
| 4570 |
LLVM_DEBUG(dbgs() << "No need to propagate\n"); |
4570 |
LLVM_DEBUG(dbgs() << "No need to propagate\n"); |
| 4571 |
continue; |
4571 |
continue; |
| 4572 |
} |
4572 |
} |
| 4573 |
// Check if we can statically extend the operand. |
4573 |
// Check if we can statically extend the operand. |
| 4574 |
Value *Opnd = ExtOpnd->getOperand(OpIdx); |
4574 |
Value *Opnd = ExtOpnd->getOperand(OpIdx); |
| 4575 |
if (const ConstantInt *Cst = dyn_cast(Opnd)) { |
4575 |
if (const ConstantInt *Cst = dyn_cast(Opnd)) { |
| 4576 |
LLVM_DEBUG(dbgs() << "Statically extend\n"); |
4576 |
LLVM_DEBUG(dbgs() << "Statically extend\n"); |
| 4577 |
unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); |
4577 |
unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); |
| 4578 |
APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) |
4578 |
APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) |
| 4579 |
: Cst->getValue().zext(BitWidth); |
4579 |
: Cst->getValue().zext(BitWidth); |
| 4580 |
TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); |
4580 |
TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); |
| 4581 |
continue; |
4581 |
continue; |
| 4582 |
} |
4582 |
} |
| 4583 |
// UndefValue are typed, so we have to statically sign extend them. |
4583 |
// UndefValue are typed, so we have to statically sign extend them. |
| 4584 |
if (isa(Opnd)) { |
4584 |
if (isa(Opnd)) { |
| 4585 |
LLVM_DEBUG(dbgs() << "Statically extend\n"); |
4585 |
LLVM_DEBUG(dbgs() << "Statically extend\n"); |
| 4586 |
TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); |
4586 |
TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); |
| 4587 |
continue; |
4587 |
continue; |
| 4588 |
} |
4588 |
} |
| 4589 |
|
4589 |
|
| 4590 |
// Otherwise we have to explicitly sign extend the operand. |
4590 |
// Otherwise we have to explicitly sign extend the operand. |
| 4591 |
// Check if Ext was reused to extend an operand. |
4591 |
// Check if Ext was reused to extend an operand. |
| 4592 |
if (!ExtForOpnd) { |
4592 |
if (!ExtForOpnd) { |
| 4593 |
// If yes, create a new one. |
4593 |
// If yes, create a new one. |
| 4594 |
LLVM_DEBUG(dbgs() << "More operands to ext\n"); |
4594 |
LLVM_DEBUG(dbgs() << "More operands to ext\n"); |
| 4595 |
Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) |
4595 |
Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) |
| 4596 |
: TPT.createZExt(Ext, Opnd, Ext->getType()); |
4596 |
: TPT.createZExt(Ext, Opnd, Ext->getType()); |
| 4597 |
if (!isa(ValForExtOpnd)) { |
4597 |
if (!isa(ValForExtOpnd)) { |
| 4598 |
TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); |
4598 |
TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); |
| 4599 |
continue; |
4599 |
continue; |
| 4600 |
} |
4600 |
} |
| 4601 |
ExtForOpnd = cast(ValForExtOpnd); |
4601 |
ExtForOpnd = cast(ValForExtOpnd); |
| 4602 |
} |
4602 |
} |
| 4603 |
if (Exts) |
4603 |
if (Exts) |
| 4604 |
Exts->push_back(ExtForOpnd); |
4604 |
Exts->push_back(ExtForOpnd); |
| 4605 |
TPT.setOperand(ExtForOpnd, 0, Opnd); |
4605 |
TPT.setOperand(ExtForOpnd, 0, Opnd); |
| 4606 |
|
4606 |
|
| 4607 |
// Move the sign extension before the insertion point. |
4607 |
// Move the sign extension before the insertion point. |
| 4608 |
TPT.moveBefore(ExtForOpnd, ExtOpnd); |
4608 |
TPT.moveBefore(ExtForOpnd, ExtOpnd); |
| 4609 |
TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); |
4609 |
TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); |
| 4610 |
CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); |
4610 |
CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); |
| 4611 |
// If more sext are required, new instructions will have to be created. |
4611 |
// If more sext are required, new instructions will have to be created. |
| 4612 |
ExtForOpnd = nullptr; |
4612 |
ExtForOpnd = nullptr; |
| 4613 |
} |
4613 |
} |
| 4614 |
if (ExtForOpnd == Ext) { |
4614 |
if (ExtForOpnd == Ext) { |
| 4615 |
LLVM_DEBUG(dbgs() << "Extension is useless now\n"); |
4615 |
LLVM_DEBUG(dbgs() << "Extension is useless now\n"); |
| 4616 |
TPT.eraseInstruction(Ext); |
4616 |
TPT.eraseInstruction(Ext); |
| 4617 |
} |
4617 |
} |
| 4618 |
return ExtOpnd; |
4618 |
return ExtOpnd; |
| 4619 |
} |
4619 |
} |
| 4620 |
|
4620 |
|
| 4621 |
/// Check whether or not promoting an instruction to a wider type is profitable. |
4621 |
/// Check whether or not promoting an instruction to a wider type is profitable. |
| 4622 |
/// \p NewCost gives the cost of extension instructions created by the |
4622 |
/// \p NewCost gives the cost of extension instructions created by the |
| 4623 |
/// promotion. |
4623 |
/// promotion. |
| 4624 |
/// \p OldCost gives the cost of extension instructions before the promotion |
4624 |
/// \p OldCost gives the cost of extension instructions before the promotion |
| 4625 |
/// plus the number of instructions that have been |
4625 |
/// plus the number of instructions that have been |
| 4626 |
/// matched in the addressing mode the promotion. |
4626 |
/// matched in the addressing mode the promotion. |
| 4627 |
/// \p PromotedOperand is the value that has been promoted. |
4627 |
/// \p PromotedOperand is the value that has been promoted. |
| 4628 |
/// \return True if the promotion is profitable, false otherwise. |
4628 |
/// \return True if the promotion is profitable, false otherwise. |
| 4629 |
bool AddressingModeMatcher::isPromotionProfitable( |
4629 |
bool AddressingModeMatcher::isPromotionProfitable( |
| 4630 |
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { |
4630 |
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { |
| 4631 |
LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost |
4631 |
LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost |
| 4632 |
<< '\n'); |
4632 |
<< '\n'); |
| 4633 |
// The cost of the new extensions is greater than the cost of the |
4633 |
// The cost of the new extensions is greater than the cost of the |
| 4634 |
// old extension plus what we folded. |
4634 |
// old extension plus what we folded. |
| 4635 |
// This is not profitable. |
4635 |
// This is not profitable. |
| 4636 |
if (NewCost > OldCost) |
4636 |
if (NewCost > OldCost) |
| 4637 |
return false; |
4637 |
return false; |
| 4638 |
if (NewCost < OldCost) |
4638 |
if (NewCost < OldCost) |
| 4639 |
return true; |
4639 |
return true; |
| 4640 |
// The promotion is neutral but it may help folding the sign extension in |
4640 |
// The promotion is neutral but it may help folding the sign extension in |
| 4641 |
// loads for instance. |
4641 |
// loads for instance. |
| 4642 |
// Check that we did not create an illegal instruction. |
4642 |
// Check that we did not create an illegal instruction. |
| 4643 |
return isPromotedInstructionLegal(TLI, DL, PromotedOperand); |
4643 |
return isPromotedInstructionLegal(TLI, DL, PromotedOperand); |
| 4644 |
} |
4644 |
} |
| 4645 |
|
4645 |
|
| 4646 |
/// Given an instruction or constant expr, see if we can fold the operation |
4646 |
/// Given an instruction or constant expr, see if we can fold the operation |
| 4647 |
/// into the addressing mode. If so, update the addressing mode and return |
4647 |
/// into the addressing mode. If so, update the addressing mode and return |
| 4648 |
/// true, otherwise return false without modifying AddrMode. |
4648 |
/// true, otherwise return false without modifying AddrMode. |
| 4649 |
/// If \p MovedAway is not NULL, it contains the information of whether or |
4649 |
/// If \p MovedAway is not NULL, it contains the information of whether or |
| 4650 |
/// not AddrInst has to be folded into the addressing mode on success. |
4650 |
/// not AddrInst has to be folded into the addressing mode on success. |
| 4651 |
/// If \p MovedAway == true, \p AddrInst will not be part of the addressing |
4651 |
/// If \p MovedAway == true, \p AddrInst will not be part of the addressing |
| 4652 |
/// because it has been moved away. |
4652 |
/// because it has been moved away. |
| 4653 |
/// Thus AddrInst must not be added in the matched instructions. |
4653 |
/// Thus AddrInst must not be added in the matched instructions. |
| 4654 |
/// This state can happen when AddrInst is a sext, since it may be moved away. |
4654 |
/// This state can happen when AddrInst is a sext, since it may be moved away. |
| 4655 |
/// Therefore, AddrInst may not be valid when MovedAway is true and it must |
4655 |
/// Therefore, AddrInst may not be valid when MovedAway is true and it must |
| 4656 |
/// not be referenced anymore. |
4656 |
/// not be referenced anymore. |
| 4657 |
bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, |
4657 |
bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, |
| 4658 |
unsigned Depth, |
4658 |
unsigned Depth, |
| 4659 |
bool *MovedAway) { |
4659 |
bool *MovedAway) { |
| 4660 |
// Avoid exponential behavior on extremely deep expression trees. |
4660 |
// Avoid exponential behavior on extremely deep expression trees. |
| 4661 |
if (Depth >= 5) |
4661 |
if (Depth >= 5) |
| 4662 |
return false; |
4662 |
return false; |
| 4663 |
|
4663 |
|
| 4664 |
// By default, all matched instructions stay in place. |
4664 |
// By default, all matched instructions stay in place. |
| 4665 |
if (MovedAway) |
4665 |
if (MovedAway) |
| 4666 |
*MovedAway = false; |
4666 |
*MovedAway = false; |
| 4667 |
|
4667 |
|
| 4668 |
switch (Opcode) { |
4668 |
switch (Opcode) { |
| 4669 |
case Instruction::PtrToInt: |
4669 |
case Instruction::PtrToInt: |
| 4670 |
// PtrToInt is always a noop, as we know that the int type is pointer sized. |
4670 |
// PtrToInt is always a noop, as we know that the int type is pointer sized. |
| 4671 |
return matchAddr(AddrInst->getOperand(0), Depth); |
4671 |
return matchAddr(AddrInst->getOperand(0), Depth); |
| 4672 |
case Instruction::IntToPtr: { |
4672 |
case Instruction::IntToPtr: { |
| 4673 |
auto AS = AddrInst->getType()->getPointerAddressSpace(); |
4673 |
auto AS = AddrInst->getType()->getPointerAddressSpace(); |
| 4674 |
auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
4674 |
auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
| 4675 |
// This inttoptr is a no-op if the integer type is pointer sized. |
4675 |
// This inttoptr is a no-op if the integer type is pointer sized. |
| 4676 |
if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) |
4676 |
if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) |
| 4677 |
return matchAddr(AddrInst->getOperand(0), Depth); |
4677 |
return matchAddr(AddrInst->getOperand(0), Depth); |
| 4678 |
return false; |
4678 |
return false; |
| 4679 |
} |
4679 |
} |
| 4680 |
case Instruction::BitCast: |
4680 |
case Instruction::BitCast: |
| 4681 |
// BitCast is always a noop, and we can handle it as long as it is |
4681 |
// BitCast is always a noop, and we can handle it as long as it is |
| 4682 |
// int->int or pointer->pointer (we don't want int<->fp or something). |
4682 |
// int->int or pointer->pointer (we don't want int<->fp or something). |
| 4683 |
if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() && |
4683 |
if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() && |
| 4684 |
// Don't touch identity bitcasts. These were probably put here by LSR, |
4684 |
// Don't touch identity bitcasts. These were probably put here by LSR, |
| 4685 |
// and we don't want to mess around with them. Assume it knows what it |
4685 |
// and we don't want to mess around with them. Assume it knows what it |
| 4686 |
// is doing. |
4686 |
// is doing. |
| 4687 |
AddrInst->getOperand(0)->getType() != AddrInst->getType()) |
4687 |
AddrInst->getOperand(0)->getType() != AddrInst->getType()) |
| 4688 |
return matchAddr(AddrInst->getOperand(0), Depth); |
4688 |
return matchAddr(AddrInst->getOperand(0), Depth); |
| 4689 |
return false; |
4689 |
return false; |
| 4690 |
case Instruction::AddrSpaceCast: { |
4690 |
case Instruction::AddrSpaceCast: { |
| 4691 |
unsigned SrcAS = |
4691 |
unsigned SrcAS = |
| 4692 |
AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); |
4692 |
AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); |
| 4693 |
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); |
4693 |
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); |
| 4694 |
if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) |
4694 |
if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) |
| 4695 |
return matchAddr(AddrInst->getOperand(0), Depth); |
4695 |
return matchAddr(AddrInst->getOperand(0), Depth); |
| 4696 |
return false; |
4696 |
return false; |
| 4697 |
} |
4697 |
} |
| 4698 |
case Instruction::Add: { |
4698 |
case Instruction::Add: { |
| 4699 |
// Check to see if we can merge in one operand, then the other. If so, we |
4699 |
// Check to see if we can merge in one operand, then the other. If so, we |
| 4700 |
// win. |
4700 |
// win. |
| 4701 |
ExtAddrMode BackupAddrMode = AddrMode; |
4701 |
ExtAddrMode BackupAddrMode = AddrMode; |
| 4702 |
unsigned OldSize = AddrModeInsts.size(); |
4702 |
unsigned OldSize = AddrModeInsts.size(); |
| 4703 |
// Start a transaction at this point. |
4703 |
// Start a transaction at this point. |
| 4704 |
// The LHS may match but not the RHS. |
4704 |
// The LHS may match but not the RHS. |
| 4705 |
// Therefore, we need a higher level restoration point to undo partially |
4705 |
// Therefore, we need a higher level restoration point to undo partially |
| 4706 |
// matched operation. |
4706 |
// matched operation. |
| 4707 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
4707 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 4708 |
TPT.getRestorationPoint(); |
4708 |
TPT.getRestorationPoint(); |
| 4709 |
|
4709 |
|
| 4710 |
// Try to match an integer constant second to increase its chance of ending |
4710 |
// Try to match an integer constant second to increase its chance of ending |
| 4711 |
// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`. |
4711 |
// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`. |
| 4712 |
int First = 0, Second = 1; |
4712 |
int First = 0, Second = 1; |
| 4713 |
if (isa(AddrInst->getOperand(First)) |
4713 |
if (isa(AddrInst->getOperand(First)) |
| 4714 |
&& !isa(AddrInst->getOperand(Second))) |
4714 |
&& !isa(AddrInst->getOperand(Second))) |
| 4715 |
std::swap(First, Second); |
4715 |
std::swap(First, Second); |
| 4716 |
AddrMode.InBounds = false; |
4716 |
AddrMode.InBounds = false; |
| 4717 |
if (matchAddr(AddrInst->getOperand(First), Depth + 1) && |
4717 |
if (matchAddr(AddrInst->getOperand(First), Depth + 1) && |
| 4718 |
matchAddr(AddrInst->getOperand(Second), Depth + 1)) |
4718 |
matchAddr(AddrInst->getOperand(Second), Depth + 1)) |
| 4719 |
return true; |
4719 |
return true; |
| 4720 |
|
4720 |
|
| 4721 |
// Restore the old addr mode info. |
4721 |
// Restore the old addr mode info. |
| 4722 |
AddrMode = BackupAddrMode; |
4722 |
AddrMode = BackupAddrMode; |
| 4723 |
AddrModeInsts.resize(OldSize); |
4723 |
AddrModeInsts.resize(OldSize); |
| 4724 |
TPT.rollback(LastKnownGood); |
4724 |
TPT.rollback(LastKnownGood); |
| 4725 |
|
4725 |
|
| 4726 |
// Otherwise this was over-aggressive. Try merging operands in the opposite |
4726 |
// Otherwise this was over-aggressive. Try merging operands in the opposite |
| 4727 |
// order. |
4727 |
// order. |
| 4728 |
if (matchAddr(AddrInst->getOperand(Second), Depth + 1) && |
4728 |
if (matchAddr(AddrInst->getOperand(Second), Depth + 1) && |
| 4729 |
matchAddr(AddrInst->getOperand(First), Depth + 1)) |
4729 |
matchAddr(AddrInst->getOperand(First), Depth + 1)) |
| 4730 |
return true; |
4730 |
return true; |
| 4731 |
|
4731 |
|
| 4732 |
// Otherwise we definitely can't merge the ADD in. |
4732 |
// Otherwise we definitely can't merge the ADD in. |
| 4733 |
AddrMode = BackupAddrMode; |
4733 |
AddrMode = BackupAddrMode; |
| 4734 |
AddrModeInsts.resize(OldSize); |
4734 |
AddrModeInsts.resize(OldSize); |
| 4735 |
TPT.rollback(LastKnownGood); |
4735 |
TPT.rollback(LastKnownGood); |
| 4736 |
break; |
4736 |
break; |
| 4737 |
} |
4737 |
} |
| 4738 |
// case Instruction::Or: |
4738 |
// case Instruction::Or: |
| 4739 |
// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. |
4739 |
// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. |
| 4740 |
// break; |
4740 |
// break; |
| 4741 |
case Instruction::Mul: |
4741 |
case Instruction::Mul: |
| 4742 |
case Instruction::Shl: { |
4742 |
case Instruction::Shl: { |
| 4743 |
// Can only handle X*C and X << C. |
4743 |
// Can only handle X*C and X << C. |
| 4744 |
AddrMode.InBounds = false; |
4744 |
AddrMode.InBounds = false; |
| 4745 |
ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); |
4745 |
ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); |
| 4746 |
if (!RHS || RHS->getBitWidth() > 64) |
4746 |
if (!RHS || RHS->getBitWidth() > 64) |
| 4747 |
return false; |
4747 |
return false; |
| 4748 |
int64_t Scale = Opcode == Instruction::Shl |
4748 |
int64_t Scale = Opcode == Instruction::Shl |
| 4749 |
? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1) |
4749 |
? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1) |
| 4750 |
: RHS->getSExtValue(); |
4750 |
: RHS->getSExtValue(); |
| 4751 |
|
4751 |
|
| 4752 |
return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); |
4752 |
return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); |
| 4753 |
} |
4753 |
} |
| 4754 |
case Instruction::GetElementPtr: { |
4754 |
case Instruction::GetElementPtr: { |
| 4755 |
// Scan the GEP. We check it if it contains constant offsets and at most |
4755 |
// Scan the GEP. We check it if it contains constant offsets and at most |
| 4756 |
// one variable offset. |
4756 |
// one variable offset. |
| 4757 |
int VariableOperand = -1; |
4757 |
int VariableOperand = -1; |
| 4758 |
unsigned VariableScale = 0; |
4758 |
unsigned VariableScale = 0; |
| 4759 |
|
4759 |
|
| 4760 |
int64_t ConstantOffset = 0; |
4760 |
int64_t ConstantOffset = 0; |
| 4761 |
gep_type_iterator GTI = gep_type_begin(AddrInst); |
4761 |
gep_type_iterator GTI = gep_type_begin(AddrInst); |
| 4762 |
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { |
4762 |
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { |
| 4763 |
if (StructType *STy = GTI.getStructTypeOrNull()) { |
4763 |
if (StructType *STy = GTI.getStructTypeOrNull()) { |
| 4764 |
const StructLayout *SL = DL.getStructLayout(STy); |
4764 |
const StructLayout *SL = DL.getStructLayout(STy); |
| 4765 |
unsigned Idx = |
4765 |
unsigned Idx = |
| 4766 |
cast(AddrInst->getOperand(i))->getZExtValue(); |
4766 |
cast(AddrInst->getOperand(i))->getZExtValue(); |
| 4767 |
ConstantOffset += SL->getElementOffset(Idx); |
4767 |
ConstantOffset += SL->getElementOffset(Idx); |
| 4768 |
} else { |
4768 |
} else { |
| 4769 |
TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); |
4769 |
TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); |
| 4770 |
if (TS.isNonZero()) { |
4770 |
if (TS.isNonZero()) { |
| 4771 |
// The optimisations below currently only work for fixed offsets. |
4771 |
// The optimisations below currently only work for fixed offsets. |
| 4772 |
if (TS.isScalable()) |
4772 |
if (TS.isScalable()) |
| 4773 |
return false; |
4773 |
return false; |
| 4774 |
int64_t TypeSize = TS.getFixedValue(); |
4774 |
int64_t TypeSize = TS.getFixedValue(); |
| 4775 |
if (ConstantInt *CI = |
4775 |
if (ConstantInt *CI = |
| 4776 |
dyn_cast(AddrInst->getOperand(i))) { |
4776 |
dyn_cast(AddrInst->getOperand(i))) { |
| 4777 |
const APInt &CVal = CI->getValue(); |
4777 |
const APInt &CVal = CI->getValue(); |
| 4778 |
if (CVal.getSignificantBits() <= 64) { |
4778 |
if (CVal.getSignificantBits() <= 64) { |
| 4779 |
ConstantOffset += CVal.getSExtValue() * TypeSize; |
4779 |
ConstantOffset += CVal.getSExtValue() * TypeSize; |
| 4780 |
continue; |
4780 |
continue; |
| 4781 |
} |
4781 |
} |
| 4782 |
} |
4782 |
} |
| 4783 |
// We only allow one variable index at the moment. |
4783 |
// We only allow one variable index at the moment. |
| 4784 |
if (VariableOperand != -1) |
4784 |
if (VariableOperand != -1) |
| 4785 |
return false; |
4785 |
return false; |
| 4786 |
|
4786 |
|
| 4787 |
// Remember the variable index. |
4787 |
// Remember the variable index. |
| 4788 |
VariableOperand = i; |
4788 |
VariableOperand = i; |
| 4789 |
VariableScale = TypeSize; |
4789 |
VariableScale = TypeSize; |
| 4790 |
} |
4790 |
} |
| 4791 |
} |
4791 |
} |
| 4792 |
} |
4792 |
} |
| 4793 |
|
4793 |
|
| 4794 |
// A common case is for the GEP to only do a constant offset. In this case, |
4794 |
// A common case is for the GEP to only do a constant offset. In this case, |
| 4795 |
// just add it to the disp field and check validity. |
4795 |
// just add it to the disp field and check validity. |
| 4796 |
if (VariableOperand == -1) { |
4796 |
if (VariableOperand == -1) { |
| 4797 |
AddrMode.BaseOffs += ConstantOffset; |
4797 |
AddrMode.BaseOffs += ConstantOffset; |
| 4798 |
if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { |
4798 |
if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { |
| 4799 |
if (!cast(AddrInst)->isInBounds()) |
4799 |
if (!cast(AddrInst)->isInBounds()) |
| 4800 |
AddrMode.InBounds = false; |
4800 |
AddrMode.InBounds = false; |
| 4801 |
return true; |
4801 |
return true; |
| 4802 |
} |
4802 |
} |
| 4803 |
AddrMode.BaseOffs -= ConstantOffset; |
4803 |
AddrMode.BaseOffs -= ConstantOffset; |
| 4804 |
|
4804 |
|
| 4805 |
if (EnableGEPOffsetSplit && isa(AddrInst) && |
4805 |
if (EnableGEPOffsetSplit && isa(AddrInst) && |
| 4806 |
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && |
4806 |
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && |
| 4807 |
ConstantOffset > 0) { |
4807 |
ConstantOffset > 0) { |
| 4808 |
// Record GEPs with non-zero offsets as candidates for splitting in |
4808 |
// Record GEPs with non-zero offsets as candidates for splitting in |
| 4809 |
// the event that the offset cannot fit into the r+i addressing mode. |
4809 |
// the event that the offset cannot fit into the r+i addressing mode. |
| 4810 |
// Simple and common case that only one GEP is used in calculating the |
4810 |
// Simple and common case that only one GEP is used in calculating the |
| 4811 |
// address for the memory access. |
4811 |
// address for the memory access. |
| 4812 |
Value *Base = AddrInst->getOperand(0); |
4812 |
Value *Base = AddrInst->getOperand(0); |
| 4813 |
auto *BaseI = dyn_cast(Base); |
4813 |
auto *BaseI = dyn_cast(Base); |
| 4814 |
auto *GEP = cast(AddrInst); |
4814 |
auto *GEP = cast(AddrInst); |
| 4815 |
if (isa(Base) || isa(Base) || |
4815 |
if (isa(Base) || isa(Base) || |
| 4816 |
(BaseI && !isa(BaseI) && |
4816 |
(BaseI && !isa(BaseI) && |
| 4817 |
!isa(BaseI))) { |
4817 |
!isa(BaseI))) { |
| 4818 |
// Make sure the parent block allows inserting non-PHI instructions |
4818 |
// Make sure the parent block allows inserting non-PHI instructions |
| 4819 |
// before the terminator. |
4819 |
// before the terminator. |
| 4820 |
BasicBlock *Parent = BaseI ? BaseI->getParent() |
4820 |
BasicBlock *Parent = BaseI ? BaseI->getParent() |
| 4821 |
: &GEP->getFunction()->getEntryBlock(); |
4821 |
: &GEP->getFunction()->getEntryBlock(); |
| 4822 |
if (!Parent->getTerminator()->isEHPad()) |
4822 |
if (!Parent->getTerminator()->isEHPad()) |
| 4823 |
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); |
4823 |
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); |
| 4824 |
} |
4824 |
} |
| 4825 |
} |
4825 |
} |
| 4826 |
|
4826 |
|
| 4827 |
return false; |
4827 |
return false; |
| 4828 |
} |
4828 |
} |
| 4829 |
|
4829 |
|
| 4830 |
// Save the valid addressing mode in case we can't match. |
4830 |
// Save the valid addressing mode in case we can't match. |
| 4831 |
ExtAddrMode BackupAddrMode = AddrMode; |
4831 |
ExtAddrMode BackupAddrMode = AddrMode; |
| 4832 |
unsigned OldSize = AddrModeInsts.size(); |
4832 |
unsigned OldSize = AddrModeInsts.size(); |
| 4833 |
|
4833 |
|
| 4834 |
// See if the scale and offset amount is valid for this target. |
4834 |
// See if the scale and offset amount is valid for this target. |
| 4835 |
AddrMode.BaseOffs += ConstantOffset; |
4835 |
AddrMode.BaseOffs += ConstantOffset; |
| 4836 |
if (!cast(AddrInst)->isInBounds()) |
4836 |
if (!cast(AddrInst)->isInBounds()) |
| 4837 |
AddrMode.InBounds = false; |
4837 |
AddrMode.InBounds = false; |
| 4838 |
|
4838 |
|
| 4839 |
// Match the base operand of the GEP. |
4839 |
// Match the base operand of the GEP. |
| 4840 |
if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) { |
4840 |
if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) { |
| 4841 |
// If it couldn't be matched, just stuff the value in a register. |
4841 |
// If it couldn't be matched, just stuff the value in a register. |
| 4842 |
if (AddrMode.HasBaseReg) { |
4842 |
if (AddrMode.HasBaseReg) { |
| 4843 |
AddrMode = BackupAddrMode; |
4843 |
AddrMode = BackupAddrMode; |
| 4844 |
AddrModeInsts.resize(OldSize); |
4844 |
AddrModeInsts.resize(OldSize); |
| 4845 |
return false; |
4845 |
return false; |
| 4846 |
} |
4846 |
} |
| 4847 |
AddrMode.HasBaseReg = true; |
4847 |
AddrMode.HasBaseReg = true; |
| 4848 |
AddrMode.BaseReg = AddrInst->getOperand(0); |
4848 |
AddrMode.BaseReg = AddrInst->getOperand(0); |
| 4849 |
} |
4849 |
} |
| 4850 |
|
4850 |
|
| 4851 |
// Match the remaining variable portion of the GEP. |
4851 |
// Match the remaining variable portion of the GEP. |
| 4852 |
if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, |
4852 |
if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, |
| 4853 |
Depth)) { |
4853 |
Depth)) { |
| 4854 |
// If it couldn't be matched, try stuffing the base into a register |
4854 |
// If it couldn't be matched, try stuffing the base into a register |
| 4855 |
// instead of matching it, and retrying the match of the scale. |
4855 |
// instead of matching it, and retrying the match of the scale. |
| 4856 |
AddrMode = BackupAddrMode; |
4856 |
AddrMode = BackupAddrMode; |
| 4857 |
AddrModeInsts.resize(OldSize); |
4857 |
AddrModeInsts.resize(OldSize); |
| 4858 |
if (AddrMode.HasBaseReg) |
4858 |
if (AddrMode.HasBaseReg) |
| 4859 |
return false; |
4859 |
return false; |
| 4860 |
AddrMode.HasBaseReg = true; |
4860 |
AddrMode.HasBaseReg = true; |
| 4861 |
AddrMode.BaseReg = AddrInst->getOperand(0); |
4861 |
AddrMode.BaseReg = AddrInst->getOperand(0); |
| 4862 |
AddrMode.BaseOffs += ConstantOffset; |
4862 |
AddrMode.BaseOffs += ConstantOffset; |
| 4863 |
if (!matchScaledValue(AddrInst->getOperand(VariableOperand), |
4863 |
if (!matchScaledValue(AddrInst->getOperand(VariableOperand), |
| 4864 |
VariableScale, Depth)) { |
4864 |
VariableScale, Depth)) { |
| 4865 |
// If even that didn't work, bail. |
4865 |
// If even that didn't work, bail. |
| 4866 |
AddrMode = BackupAddrMode; |
4866 |
AddrMode = BackupAddrMode; |
| 4867 |
AddrModeInsts.resize(OldSize); |
4867 |
AddrModeInsts.resize(OldSize); |
| 4868 |
return false; |
4868 |
return false; |
| 4869 |
} |
4869 |
} |
| 4870 |
} |
4870 |
} |
| 4871 |
|
4871 |
|
| 4872 |
return true; |
4872 |
return true; |
| 4873 |
} |
4873 |
} |
| 4874 |
case Instruction::SExt: |
4874 |
case Instruction::SExt: |
| 4875 |
case Instruction::ZExt: { |
4875 |
case Instruction::ZExt: { |
| 4876 |
Instruction *Ext = dyn_cast(AddrInst); |
4876 |
Instruction *Ext = dyn_cast(AddrInst); |
| 4877 |
if (!Ext) |
4877 |
if (!Ext) |
| 4878 |
return false; |
4878 |
return false; |
| 4879 |
|
4879 |
|
| 4880 |
// Try to move this ext out of the way of the addressing mode. |
4880 |
// Try to move this ext out of the way of the addressing mode. |
| 4881 |
// Ask for a method for doing so. |
4881 |
// Ask for a method for doing so. |
| 4882 |
TypePromotionHelper::Action TPH = |
4882 |
TypePromotionHelper::Action TPH = |
| 4883 |
TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); |
4883 |
TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); |
| 4884 |
if (!TPH) |
4884 |
if (!TPH) |
| 4885 |
return false; |
4885 |
return false; |
| 4886 |
|
4886 |
|
| 4887 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
4887 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 4888 |
TPT.getRestorationPoint(); |
4888 |
TPT.getRestorationPoint(); |
| 4889 |
unsigned CreatedInstsCost = 0; |
4889 |
unsigned CreatedInstsCost = 0; |
| 4890 |
unsigned ExtCost = !TLI.isExtFree(Ext); |
4890 |
unsigned ExtCost = !TLI.isExtFree(Ext); |
| 4891 |
Value *PromotedOperand = |
4891 |
Value *PromotedOperand = |
| 4892 |
TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); |
4892 |
TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); |
| 4893 |
// SExt has been moved away. |
4893 |
// SExt has been moved away. |
| 4894 |
// Thus either it will be rematched later in the recursive calls or it is |
4894 |
// Thus either it will be rematched later in the recursive calls or it is |
| 4895 |
// gone. Anyway, we must not fold it into the addressing mode at this point. |
4895 |
// gone. Anyway, we must not fold it into the addressing mode at this point. |
| 4896 |
// E.g., |
4896 |
// E.g., |
| 4897 |
// op = add opnd, 1 |
4897 |
// op = add opnd, 1 |
| 4898 |
// idx = ext op |
4898 |
// idx = ext op |
| 4899 |
// addr = gep base, idx |
4899 |
// addr = gep base, idx |
| 4900 |
// is now: |
4900 |
// is now: |
| 4901 |
// promotedOpnd = ext opnd <- no match here |
4901 |
// promotedOpnd = ext opnd <- no match here |
| 4902 |
// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) |
4902 |
// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) |
| 4903 |
// addr = gep base, op <- match |
4903 |
// addr = gep base, op <- match |
| 4904 |
if (MovedAway) |
4904 |
if (MovedAway) |
| 4905 |
*MovedAway = true; |
4905 |
*MovedAway = true; |
| 4906 |
|
4906 |
|
| 4907 |
assert(PromotedOperand && |
4907 |
assert(PromotedOperand && |
| 4908 |
"TypePromotionHelper should have filtered out those cases"); |
4908 |
"TypePromotionHelper should have filtered out those cases"); |
| 4909 |
|
4909 |
|
| 4910 |
ExtAddrMode BackupAddrMode = AddrMode; |
4910 |
ExtAddrMode BackupAddrMode = AddrMode; |
| 4911 |
unsigned OldSize = AddrModeInsts.size(); |
4911 |
unsigned OldSize = AddrModeInsts.size(); |
| 4912 |
|
4912 |
|
| 4913 |
if (!matchAddr(PromotedOperand, Depth) || |
4913 |
if (!matchAddr(PromotedOperand, Depth) || |
| 4914 |
// The total of the new cost is equal to the cost of the created |
4914 |
// The total of the new cost is equal to the cost of the created |
| 4915 |
// instructions. |
4915 |
// instructions. |
| 4916 |
// The total of the old cost is equal to the cost of the extension plus |
4916 |
// The total of the old cost is equal to the cost of the extension plus |
| 4917 |
// what we have saved in the addressing mode. |
4917 |
// what we have saved in the addressing mode. |
| 4918 |
!isPromotionProfitable(CreatedInstsCost, |
4918 |
!isPromotionProfitable(CreatedInstsCost, |
| 4919 |
ExtCost + (AddrModeInsts.size() - OldSize), |
4919 |
ExtCost + (AddrModeInsts.size() - OldSize), |
| 4920 |
PromotedOperand)) { |
4920 |
PromotedOperand)) { |
| 4921 |
AddrMode = BackupAddrMode; |
4921 |
AddrMode = BackupAddrMode; |
| 4922 |
AddrModeInsts.resize(OldSize); |
4922 |
AddrModeInsts.resize(OldSize); |
| 4923 |
LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); |
4923 |
LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); |
| 4924 |
TPT.rollback(LastKnownGood); |
4924 |
TPT.rollback(LastKnownGood); |
| 4925 |
return false; |
4925 |
return false; |
| 4926 |
} |
4926 |
} |
| 4927 |
return true; |
4927 |
return true; |
| 4928 |
} |
4928 |
} |
| 4929 |
} |
4929 |
} |
| 4930 |
return false; |
4930 |
return false; |
| 4931 |
} |
4931 |
} |
| 4932 |
|
4932 |
|
| 4933 |
/// If we can, try to add the value of 'Addr' into the current addressing mode. |
4933 |
/// If we can, try to add the value of 'Addr' into the current addressing mode. |
| 4934 |
/// If Addr can't be added to AddrMode this returns false and leaves AddrMode |
4934 |
/// If Addr can't be added to AddrMode this returns false and leaves AddrMode |
| 4935 |
/// unmodified. This assumes that Addr is either a pointer type or intptr_t |
4935 |
/// unmodified. This assumes that Addr is either a pointer type or intptr_t |
| 4936 |
/// for the target. |
4936 |
/// for the target. |
| 4937 |
/// |
4937 |
/// |
| 4938 |
bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { |
4938 |
bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { |
| 4939 |
// Start a transaction at this point that we will rollback if the matching |
4939 |
// Start a transaction at this point that we will rollback if the matching |
| 4940 |
// fails. |
4940 |
// fails. |
| 4941 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
4941 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 4942 |
TPT.getRestorationPoint(); |
4942 |
TPT.getRestorationPoint(); |
| 4943 |
if (ConstantInt *CI = dyn_cast(Addr)) { |
4943 |
if (ConstantInt *CI = dyn_cast(Addr)) { |
| 4944 |
if (CI->getValue().isSignedIntN(64)) { |
4944 |
if (CI->getValue().isSignedIntN(64)) { |
| 4945 |
// Fold in immediates if legal for the target. |
4945 |
// Fold in immediates if legal for the target. |
| 4946 |
AddrMode.BaseOffs += CI->getSExtValue(); |
4946 |
AddrMode.BaseOffs += CI->getSExtValue(); |
| 4947 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
4947 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
| 4948 |
return true; |
4948 |
return true; |
| 4949 |
AddrMode.BaseOffs -= CI->getSExtValue(); |
4949 |
AddrMode.BaseOffs -= CI->getSExtValue(); |
| 4950 |
} |
4950 |
} |
| 4951 |
} else if (GlobalValue *GV = dyn_cast(Addr)) { |
4951 |
} else if (GlobalValue *GV = dyn_cast(Addr)) { |
| 4952 |
// If this is a global variable, try to fold it into the addressing mode. |
4952 |
// If this is a global variable, try to fold it into the addressing mode. |
| 4953 |
if (!AddrMode.BaseGV) { |
4953 |
if (!AddrMode.BaseGV) { |
| 4954 |
AddrMode.BaseGV = GV; |
4954 |
AddrMode.BaseGV = GV; |
| 4955 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
4955 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
| 4956 |
return true; |
4956 |
return true; |
| 4957 |
AddrMode.BaseGV = nullptr; |
4957 |
AddrMode.BaseGV = nullptr; |
| 4958 |
} |
4958 |
} |
| 4959 |
} else if (Instruction *I = dyn_cast(Addr)) { |
4959 |
} else if (Instruction *I = dyn_cast(Addr)) { |
| 4960 |
ExtAddrMode BackupAddrMode = AddrMode; |
4960 |
ExtAddrMode BackupAddrMode = AddrMode; |
| 4961 |
unsigned OldSize = AddrModeInsts.size(); |
4961 |
unsigned OldSize = AddrModeInsts.size(); |
| 4962 |
|
4962 |
|
| 4963 |
// Check to see if it is possible to fold this operation. |
4963 |
// Check to see if it is possible to fold this operation. |
| 4964 |
bool MovedAway = false; |
4964 |
bool MovedAway = false; |
| 4965 |
if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { |
4965 |
if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { |
| 4966 |
// This instruction may have been moved away. If so, there is nothing |
4966 |
// This instruction may have been moved away. If so, there is nothing |
| 4967 |
// to check here. |
4967 |
// to check here. |
| 4968 |
if (MovedAway) |
4968 |
if (MovedAway) |
| 4969 |
return true; |
4969 |
return true; |
| 4970 |
// Okay, it's possible to fold this. Check to see if it is actually |
4970 |
// Okay, it's possible to fold this. Check to see if it is actually |
| 4971 |
// *profitable* to do so. We use a simple cost model to avoid increasing |
4971 |
// *profitable* to do so. We use a simple cost model to avoid increasing |
| 4972 |
// register pressure too much. |
4972 |
// register pressure too much. |
| 4973 |
if (I->hasOneUse() || |
4973 |
if (I->hasOneUse() || |
| 4974 |
isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { |
4974 |
isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { |
| 4975 |
AddrModeInsts.push_back(I); |
4975 |
AddrModeInsts.push_back(I); |
| 4976 |
return true; |
4976 |
return true; |
| 4977 |
} |
4977 |
} |
| 4978 |
|
4978 |
|
| 4979 |
// It isn't profitable to do this, roll back. |
4979 |
// It isn't profitable to do this, roll back. |
| 4980 |
AddrMode = BackupAddrMode; |
4980 |
AddrMode = BackupAddrMode; |
| 4981 |
AddrModeInsts.resize(OldSize); |
4981 |
AddrModeInsts.resize(OldSize); |
| 4982 |
TPT.rollback(LastKnownGood); |
4982 |
TPT.rollback(LastKnownGood); |
| 4983 |
} |
4983 |
} |
| 4984 |
} else if (ConstantExpr *CE = dyn_cast(Addr)) { |
4984 |
} else if (ConstantExpr *CE = dyn_cast(Addr)) { |
| 4985 |
if (matchOperationAddr(CE, CE->getOpcode(), Depth)) |
4985 |
if (matchOperationAddr(CE, CE->getOpcode(), Depth)) |
| 4986 |
return true; |
4986 |
return true; |
| 4987 |
TPT.rollback(LastKnownGood); |
4987 |
TPT.rollback(LastKnownGood); |
| 4988 |
} else if (isa(Addr)) { |
4988 |
} else if (isa(Addr)) { |
| 4989 |
// Null pointer gets folded without affecting the addressing mode. |
4989 |
// Null pointer gets folded without affecting the addressing mode. |
| 4990 |
return true; |
4990 |
return true; |
| 4991 |
} |
4991 |
} |
| 4992 |
|
4992 |
|
| 4993 |
// Worse case, the target should support [reg] addressing modes. :) |
4993 |
// Worse case, the target should support [reg] addressing modes. :) |
| 4994 |
if (!AddrMode.HasBaseReg) { |
4994 |
if (!AddrMode.HasBaseReg) { |
| 4995 |
AddrMode.HasBaseReg = true; |
4995 |
AddrMode.HasBaseReg = true; |
| 4996 |
AddrMode.BaseReg = Addr; |
4996 |
AddrMode.BaseReg = Addr; |
| 4997 |
// Still check for legality in case the target supports [imm] but not [i+r]. |
4997 |
// Still check for legality in case the target supports [imm] but not [i+r]. |
| 4998 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
4998 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
| 4999 |
return true; |
4999 |
return true; |
| 5000 |
AddrMode.HasBaseReg = false; |
5000 |
AddrMode.HasBaseReg = false; |
| 5001 |
AddrMode.BaseReg = nullptr; |
5001 |
AddrMode.BaseReg = nullptr; |
| 5002 |
} |
5002 |
} |
| 5003 |
|
5003 |
|
| 5004 |
// If the base register is already taken, see if we can do [r+r]. |
5004 |
// If the base register is already taken, see if we can do [r+r]. |
| 5005 |
if (AddrMode.Scale == 0) { |
5005 |
if (AddrMode.Scale == 0) { |
| 5006 |
AddrMode.Scale = 1; |
5006 |
AddrMode.Scale = 1; |
| 5007 |
AddrMode.ScaledReg = Addr; |
5007 |
AddrMode.ScaledReg = Addr; |
| 5008 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
5008 |
if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) |
| 5009 |
return true; |
5009 |
return true; |
| 5010 |
AddrMode.Scale = 0; |
5010 |
AddrMode.Scale = 0; |
| 5011 |
AddrMode.ScaledReg = nullptr; |
5011 |
AddrMode.ScaledReg = nullptr; |
| 5012 |
} |
5012 |
} |
| 5013 |
// Couldn't match. |
5013 |
// Couldn't match. |
| 5014 |
TPT.rollback(LastKnownGood); |
5014 |
TPT.rollback(LastKnownGood); |
| 5015 |
return false; |
5015 |
return false; |
| 5016 |
} |
5016 |
} |
| 5017 |
|
5017 |
|
| 5018 |
/// Check to see if all uses of OpVal by the specified inline asm call are due |
5018 |
/// Check to see if all uses of OpVal by the specified inline asm call are due |
| 5019 |
/// to memory operands. If so, return true, otherwise return false. |
5019 |
/// to memory operands. If so, return true, otherwise return false. |
| 5020 |
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, |
5020 |
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, |
| 5021 |
const TargetLowering &TLI, |
5021 |
const TargetLowering &TLI, |
| 5022 |
const TargetRegisterInfo &TRI) { |
5022 |
const TargetRegisterInfo &TRI) { |
| 5023 |
const Function *F = CI->getFunction(); |
5023 |
const Function *F = CI->getFunction(); |
| 5024 |
TargetLowering::AsmOperandInfoVector TargetConstraints = |
5024 |
TargetLowering::AsmOperandInfoVector TargetConstraints = |
| 5025 |
TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); |
5025 |
TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI); |
| 5026 |
|
5026 |
|
| 5027 |
for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { |
5027 |
for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { |
| 5028 |
// Compute the constraint code and ConstraintType to use. |
5028 |
// Compute the constraint code and ConstraintType to use. |
| 5029 |
TLI.ComputeConstraintToUse(OpInfo, SDValue()); |
5029 |
TLI.ComputeConstraintToUse(OpInfo, SDValue()); |
| 5030 |
|
5030 |
|
| 5031 |
// If this asm operand is our Value*, and if it isn't an indirect memory |
5031 |
// If this asm operand is our Value*, and if it isn't an indirect memory |
| 5032 |
// operand, we can't fold it! TODO: Also handle C_Address? |
5032 |
// operand, we can't fold it! TODO: Also handle C_Address? |
| 5033 |
if (OpInfo.CallOperandVal == OpVal && |
5033 |
if (OpInfo.CallOperandVal == OpVal && |
| 5034 |
(OpInfo.ConstraintType != TargetLowering::C_Memory || |
5034 |
(OpInfo.ConstraintType != TargetLowering::C_Memory || |
| 5035 |
!OpInfo.isIndirect)) |
5035 |
!OpInfo.isIndirect)) |
| 5036 |
return false; |
5036 |
return false; |
| 5037 |
} |
5037 |
} |
| 5038 |
|
5038 |
|
| 5039 |
return true; |
5039 |
return true; |
| 5040 |
} |
5040 |
} |
| 5041 |
|
5041 |
|
| 5042 |
/// Recursively walk all the uses of I until we find a memory use. |
5042 |
/// Recursively walk all the uses of I until we find a memory use. |
| 5043 |
/// If we find an obviously non-foldable instruction, return true. |
5043 |
/// If we find an obviously non-foldable instruction, return true. |
| 5044 |
/// Add accessed addresses and types to MemoryUses. |
5044 |
/// Add accessed addresses and types to MemoryUses. |
| 5045 |
static bool FindAllMemoryUses( |
5045 |
static bool FindAllMemoryUses( |
| 5046 |
Instruction *I, SmallVectorImpl> &MemoryUses, |
5046 |
Instruction *I, SmallVectorImpl> &MemoryUses, |
| 5047 |
SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, |
5047 |
SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, |
| 5048 |
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, |
5048 |
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, |
| 5049 |
BlockFrequencyInfo *BFI, unsigned &SeenInsts) { |
5049 |
BlockFrequencyInfo *BFI, unsigned &SeenInsts) { |
| 5050 |
// If we already considered this instruction, we're done. |
5050 |
// If we already considered this instruction, we're done. |
| 5051 |
if (!ConsideredInsts.insert(I).second) |
5051 |
if (!ConsideredInsts.insert(I).second) |
| 5052 |
return false; |
5052 |
return false; |
| 5053 |
|
5053 |
|
| 5054 |
// If this is an obviously unfoldable instruction, bail out. |
5054 |
// If this is an obviously unfoldable instruction, bail out. |
| 5055 |
if (!MightBeFoldableInst(I)) |
5055 |
if (!MightBeFoldableInst(I)) |
| 5056 |
return true; |
5056 |
return true; |
| 5057 |
|
5057 |
|
| 5058 |
// Loop over all the uses, recursively processing them. |
5058 |
// Loop over all the uses, recursively processing them. |
| 5059 |
for (Use &U : I->uses()) { |
5059 |
for (Use &U : I->uses()) { |
| 5060 |
// Conservatively return true if we're seeing a large number or a deep chain |
5060 |
// Conservatively return true if we're seeing a large number or a deep chain |
| 5061 |
// of users. This avoids excessive compilation times in pathological cases. |
5061 |
// of users. This avoids excessive compilation times in pathological cases. |
| 5062 |
if (SeenInsts++ >= MaxAddressUsersToScan) |
5062 |
if (SeenInsts++ >= MaxAddressUsersToScan) |
| 5063 |
return true; |
5063 |
return true; |
| 5064 |
|
5064 |
|
| 5065 |
Instruction *UserI = cast(U.getUser()); |
5065 |
Instruction *UserI = cast(U.getUser()); |
| 5066 |
if (LoadInst *LI = dyn_cast(UserI)) { |
5066 |
if (LoadInst *LI = dyn_cast(UserI)) { |
| 5067 |
MemoryUses.push_back({&U, LI->getType()}); |
5067 |
MemoryUses.push_back({&U, LI->getType()}); |
| 5068 |
continue; |
5068 |
continue; |
| 5069 |
} |
5069 |
} |
| 5070 |
|
5070 |
|
| 5071 |
if (StoreInst *SI = dyn_cast(UserI)) { |
5071 |
if (StoreInst *SI = dyn_cast(UserI)) { |
| 5072 |
if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) |
5072 |
if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) |
| 5073 |
return true; // Storing addr, not into addr. |
5073 |
return true; // Storing addr, not into addr. |
| 5074 |
MemoryUses.push_back({&U, SI->getValueOperand()->getType()}); |
5074 |
MemoryUses.push_back({&U, SI->getValueOperand()->getType()}); |
| 5075 |
continue; |
5075 |
continue; |
| 5076 |
} |
5076 |
} |
| 5077 |
|
5077 |
|
| 5078 |
if (AtomicRMWInst *RMW = dyn_cast(UserI)) { |
5078 |
if (AtomicRMWInst *RMW = dyn_cast(UserI)) { |
| 5079 |
if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex()) |
5079 |
if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex()) |
| 5080 |
return true; // Storing addr, not into addr. |
5080 |
return true; // Storing addr, not into addr. |
| 5081 |
MemoryUses.push_back({&U, RMW->getValOperand()->getType()}); |
5081 |
MemoryUses.push_back({&U, RMW->getValOperand()->getType()}); |
| 5082 |
continue; |
5082 |
continue; |
| 5083 |
} |
5083 |
} |
| 5084 |
|
5084 |
|
| 5085 |
if (AtomicCmpXchgInst *CmpX = dyn_cast(UserI)) { |
5085 |
if (AtomicCmpXchgInst *CmpX = dyn_cast(UserI)) { |
| 5086 |
if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex()) |
5086 |
if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex()) |
| 5087 |
return true; // Storing addr, not into addr. |
5087 |
return true; // Storing addr, not into addr. |
| 5088 |
MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()}); |
5088 |
MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()}); |
| 5089 |
continue; |
5089 |
continue; |
| 5090 |
} |
5090 |
} |
| 5091 |
|
5091 |
|
| 5092 |
if (CallInst *CI = dyn_cast(UserI)) { |
5092 |
if (CallInst *CI = dyn_cast(UserI)) { |
| 5093 |
if (CI->hasFnAttr(Attribute::Cold)) { |
5093 |
if (CI->hasFnAttr(Attribute::Cold)) { |
| 5094 |
// If this is a cold call, we can sink the addressing calculation into |
5094 |
// If this is a cold call, we can sink the addressing calculation into |
| 5095 |
// the cold path. See optimizeCallInst |
5095 |
// the cold path. See optimizeCallInst |
| 5096 |
bool OptForSize = |
5096 |
bool OptForSize = |
| 5097 |
OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); |
5097 |
OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); |
| 5098 |
if (!OptForSize) |
5098 |
if (!OptForSize) |
| 5099 |
continue; |
5099 |
continue; |
| 5100 |
} |
5100 |
} |
| 5101 |
|
5101 |
|
| 5102 |
InlineAsm *IA = dyn_cast(CI->getCalledOperand()); |
5102 |
InlineAsm *IA = dyn_cast(CI->getCalledOperand()); |
| 5103 |
if (!IA) |
5103 |
if (!IA) |
| 5104 |
return true; |
5104 |
return true; |
| 5105 |
|
5105 |
|
| 5106 |
// If this is a memory operand, we're cool, otherwise bail out. |
5106 |
// If this is a memory operand, we're cool, otherwise bail out. |
| 5107 |
if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) |
5107 |
if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) |
| 5108 |
return true; |
5108 |
return true; |
| 5109 |
continue; |
5109 |
continue; |
| 5110 |
} |
5110 |
} |
| 5111 |
|
5111 |
|
| 5112 |
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, |
5112 |
if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, |
| 5113 |
PSI, BFI, SeenInsts)) |
5113 |
PSI, BFI, SeenInsts)) |
| 5114 |
return true; |
5114 |
return true; |
| 5115 |
} |
5115 |
} |
| 5116 |
|
5116 |
|
| 5117 |
return false; |
5117 |
return false; |
| 5118 |
} |
5118 |
} |
| 5119 |
|
5119 |
|
| 5120 |
static bool FindAllMemoryUses( |
5120 |
static bool FindAllMemoryUses( |
| 5121 |
Instruction *I, SmallVectorImpl> &MemoryUses, |
5121 |
Instruction *I, SmallVectorImpl> &MemoryUses, |
| 5122 |
const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, |
5122 |
const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, |
| 5123 |
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { |
5123 |
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { |
| 5124 |
unsigned SeenInsts = 0; |
5124 |
unsigned SeenInsts = 0; |
| 5125 |
SmallPtrSet ConsideredInsts; |
5125 |
SmallPtrSet ConsideredInsts; |
| 5126 |
return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, |
5126 |
return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, |
| 5127 |
PSI, BFI, SeenInsts); |
5127 |
PSI, BFI, SeenInsts); |
| 5128 |
} |
5128 |
} |
| 5129 |
|
5129 |
|
| 5130 |
|
5130 |
|
| 5131 |
/// Return true if Val is already known to be live at the use site that we're |
5131 |
/// Return true if Val is already known to be live at the use site that we're |
| 5132 |
/// folding it into. If so, there is no cost to include it in the addressing |
5132 |
/// folding it into. If so, there is no cost to include it in the addressing |
| 5133 |
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the |
5133 |
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the |
| 5134 |
/// instruction already. |
5134 |
/// instruction already. |
| 5135 |
bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val, |
5135 |
bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val, |
| 5136 |
Value *KnownLive1, |
5136 |
Value *KnownLive1, |
| 5137 |
Value *KnownLive2) { |
5137 |
Value *KnownLive2) { |
| 5138 |
// If Val is either of the known-live values, we know it is live! |
5138 |
// If Val is either of the known-live values, we know it is live! |
| 5139 |
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) |
5139 |
if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) |
| 5140 |
return true; |
5140 |
return true; |
| 5141 |
|
5141 |
|
| 5142 |
// All values other than instructions and arguments (e.g. constants) are live. |
5142 |
// All values other than instructions and arguments (e.g. constants) are live. |
| 5143 |
if (!isa(Val) && !isa(Val)) |
5143 |
if (!isa(Val) && !isa(Val)) |
| 5144 |
return true; |
5144 |
return true; |
| 5145 |
|
5145 |
|
| 5146 |
// If Val is a constant sized alloca in the entry block, it is live, this is |
5146 |
// If Val is a constant sized alloca in the entry block, it is live, this is |
| 5147 |
// true because it is just a reference to the stack/frame pointer, which is |
5147 |
// true because it is just a reference to the stack/frame pointer, which is |
| 5148 |
// live for the whole function. |
5148 |
// live for the whole function. |
| 5149 |
if (AllocaInst *AI = dyn_cast(Val)) |
5149 |
if (AllocaInst *AI = dyn_cast(Val)) |
| 5150 |
if (AI->isStaticAlloca()) |
5150 |
if (AI->isStaticAlloca()) |
| 5151 |
return true; |
5151 |
return true; |
| 5152 |
|
5152 |
|
| 5153 |
// Check to see if this value is already used in the memory instruction's |
5153 |
// Check to see if this value is already used in the memory instruction's |
| 5154 |
// block. If so, it's already live into the block at the very least, so we |
5154 |
// block. If so, it's already live into the block at the very least, so we |
| 5155 |
// can reasonably fold it. |
5155 |
// can reasonably fold it. |
| 5156 |
return Val->isUsedInBasicBlock(MemoryInst->getParent()); |
5156 |
return Val->isUsedInBasicBlock(MemoryInst->getParent()); |
| 5157 |
} |
5157 |
} |
| 5158 |
|
5158 |
|
| 5159 |
/// It is possible for the addressing mode of the machine to fold the specified |
5159 |
/// It is possible for the addressing mode of the machine to fold the specified |
| 5160 |
/// instruction into a load or store that ultimately uses it. |
5160 |
/// instruction into a load or store that ultimately uses it. |
| 5161 |
/// However, the specified instruction has multiple uses. |
5161 |
/// However, the specified instruction has multiple uses. |
| 5162 |
/// Given this, it may actually increase register pressure to fold it |
5162 |
/// Given this, it may actually increase register pressure to fold it |
| 5163 |
/// into the load. For example, consider this code: |
5163 |
/// into the load. For example, consider this code: |
| 5164 |
/// |
5164 |
/// |
| 5165 |
/// X = ... |
5165 |
/// X = ... |
| 5166 |
/// Y = X+1 |
5166 |
/// Y = X+1 |
| 5167 |
/// use(Y) -> nonload/store |
5167 |
/// use(Y) -> nonload/store |
| 5168 |
/// Z = Y+1 |
5168 |
/// Z = Y+1 |
| 5169 |
/// load Z |
5169 |
/// load Z |
| 5170 |
/// |
5170 |
/// |
| 5171 |
/// In this case, Y has multiple uses, and can be folded into the load of Z |
5171 |
/// In this case, Y has multiple uses, and can be folded into the load of Z |
| 5172 |
/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to |
5172 |
/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to |
| 5173 |
/// be live at the use(Y) line. If we don't fold Y into load Z, we use one |
5173 |
/// be live at the use(Y) line. If we don't fold Y into load Z, we use one |
| 5174 |
/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the |
5174 |
/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the |
| 5175 |
/// number of computations either. |
5175 |
/// number of computations either. |
| 5176 |
/// |
5176 |
/// |
| 5177 |
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If |
5177 |
/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If |
| 5178 |
/// X was live across 'load Z' for other reasons, we actually *would* want to |
5178 |
/// X was live across 'load Z' for other reasons, we actually *would* want to |
| 5179 |
/// fold the addressing mode in the Z case. This would make Y die earlier. |
5179 |
/// fold the addressing mode in the Z case. This would make Y die earlier. |
| 5180 |
bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( |
5180 |
bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode( |
| 5181 |
Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { |
5181 |
Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { |
| 5182 |
if (IgnoreProfitability) |
5182 |
if (IgnoreProfitability) |
| 5183 |
return true; |
5183 |
return true; |
| 5184 |
|
5184 |
|
| 5185 |
// AMBefore is the addressing mode before this instruction was folded into it, |
5185 |
// AMBefore is the addressing mode before this instruction was folded into it, |
| 5186 |
// and AMAfter is the addressing mode after the instruction was folded. Get |
5186 |
// and AMAfter is the addressing mode after the instruction was folded. Get |
| 5187 |
// the set of registers referenced by AMAfter and subtract out those |
5187 |
// the set of registers referenced by AMAfter and subtract out those |
| 5188 |
// referenced by AMBefore: this is the set of values which folding in this |
5188 |
// referenced by AMBefore: this is the set of values which folding in this |
| 5189 |
// address extends the lifetime of. |
5189 |
// address extends the lifetime of. |
| 5190 |
// |
5190 |
// |
| 5191 |
// Note that there are only two potential values being referenced here, |
5191 |
// Note that there are only two potential values being referenced here, |
| 5192 |
// BaseReg and ScaleReg (global addresses are always available, as are any |
5192 |
// BaseReg and ScaleReg (global addresses are always available, as are any |
| 5193 |
// folded immediates). |
5193 |
// folded immediates). |
| 5194 |
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; |
5194 |
Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; |
| 5195 |
|
5195 |
|
| 5196 |
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their |
5196 |
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their |
| 5197 |
// lifetime wasn't extended by adding this instruction. |
5197 |
// lifetime wasn't extended by adding this instruction. |
| 5198 |
if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) |
5198 |
if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) |
| 5199 |
BaseReg = nullptr; |
5199 |
BaseReg = nullptr; |
| 5200 |
if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) |
5200 |
if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) |
| 5201 |
ScaledReg = nullptr; |
5201 |
ScaledReg = nullptr; |
| 5202 |
|
5202 |
|
| 5203 |
// If folding this instruction (and it's subexprs) didn't extend any live |
5203 |
// If folding this instruction (and it's subexprs) didn't extend any live |
| 5204 |
// ranges, we're ok with it. |
5204 |
// ranges, we're ok with it. |
| 5205 |
if (!BaseReg && !ScaledReg) |
5205 |
if (!BaseReg && !ScaledReg) |
| 5206 |
return true; |
5206 |
return true; |
| 5207 |
|
5207 |
|
| 5208 |
// If all uses of this instruction can have the address mode sunk into them, |
5208 |
// If all uses of this instruction can have the address mode sunk into them, |
| 5209 |
// we can remove the addressing mode and effectively trade one live register |
5209 |
// we can remove the addressing mode and effectively trade one live register |
| 5210 |
// for another (at worst.) In this context, folding an addressing mode into |
5210 |
// for another (at worst.) In this context, folding an addressing mode into |
| 5211 |
// the use is just a particularly nice way of sinking it. |
5211 |
// the use is just a particularly nice way of sinking it. |
| 5212 |
SmallVector, 16> MemoryUses; |
5212 |
SmallVector, 16> MemoryUses; |
| 5213 |
if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI)) |
5213 |
if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI)) |
| 5214 |
return false; // Has a non-memory, non-foldable use! |
5214 |
return false; // Has a non-memory, non-foldable use! |
| 5215 |
|
5215 |
|
| 5216 |
// Now that we know that all uses of this instruction are part of a chain of |
5216 |
// Now that we know that all uses of this instruction are part of a chain of |
| 5217 |
// computation involving only operations that could theoretically be folded |
5217 |
// computation involving only operations that could theoretically be folded |
| 5218 |
// into a memory use, loop over each of these memory operation uses and see |
5218 |
// into a memory use, loop over each of these memory operation uses and see |
| 5219 |
// if they could *actually* fold the instruction. The assumption is that |
5219 |
// if they could *actually* fold the instruction. The assumption is that |
| 5220 |
// addressing modes are cheap and that duplicating the computation involved |
5220 |
// addressing modes are cheap and that duplicating the computation involved |
| 5221 |
// many times is worthwhile, even on a fastpath. For sinking candidates |
5221 |
// many times is worthwhile, even on a fastpath. For sinking candidates |
| 5222 |
// (i.e. cold call sites), this serves as a way to prevent excessive code |
5222 |
// (i.e. cold call sites), this serves as a way to prevent excessive code |
| 5223 |
// growth since most architectures have some reasonable small and fast way to |
5223 |
// growth since most architectures have some reasonable small and fast way to |
| 5224 |
// compute an effective address. (i.e LEA on x86) |
5224 |
// compute an effective address. (i.e LEA on x86) |
| 5225 |
SmallVector MatchedAddrModeInsts; |
5225 |
SmallVector MatchedAddrModeInsts; |
| 5226 |
for (const std::pair |
5226 |
for (const std::pair |
| 5227 |
Value *Address = Pair.first->get(); |
5227 |
Value *Address = Pair.first->get(); |
| 5228 |
Instruction *UserI = cast(Pair.first->getUser()); |
5228 |
Instruction *UserI = cast(Pair.first->getUser()); |
| 5229 |
Type *AddressAccessTy = Pair.second; |
5229 |
Type *AddressAccessTy = Pair.second; |
| 5230 |
unsigned AS = Address->getType()->getPointerAddressSpace(); |
5230 |
unsigned AS = Address->getType()->getPointerAddressSpace(); |
| 5231 |
|
5231 |
|
| 5232 |
// Do a match against the root of this address, ignoring profitability. This |
5232 |
// Do a match against the root of this address, ignoring profitability. This |
| 5233 |
// will tell us if the addressing mode for the memory operation will |
5233 |
// will tell us if the addressing mode for the memory operation will |
| 5234 |
// *actually* cover the shared instruction. |
5234 |
// *actually* cover the shared instruction. |
| 5235 |
ExtAddrMode Result; |
5235 |
ExtAddrMode Result; |
| 5236 |
std::pair, int64_t> LargeOffsetGEP(nullptr, |
5236 |
std::pair, int64_t> LargeOffsetGEP(nullptr, |
| 5237 |
0); |
5237 |
0); |
| 5238 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
5238 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 5239 |
TPT.getRestorationPoint(); |
5239 |
TPT.getRestorationPoint(); |
| 5240 |
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, |
5240 |
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, |
| 5241 |
AddressAccessTy, AS, UserI, Result, |
5241 |
AddressAccessTy, AS, UserI, Result, |
| 5242 |
InsertedInsts, PromotedInsts, TPT, |
5242 |
InsertedInsts, PromotedInsts, TPT, |
| 5243 |
LargeOffsetGEP, OptSize, PSI, BFI); |
5243 |
LargeOffsetGEP, OptSize, PSI, BFI); |
| 5244 |
Matcher.IgnoreProfitability = true; |
5244 |
Matcher.IgnoreProfitability = true; |
| 5245 |
bool Success = Matcher.matchAddr(Address, 0); |
5245 |
bool Success = Matcher.matchAddr(Address, 0); |
| 5246 |
(void)Success; |
5246 |
(void)Success; |
| 5247 |
assert(Success && "Couldn't select *anything*?"); |
5247 |
assert(Success && "Couldn't select *anything*?"); |
| 5248 |
|
5248 |
|
| 5249 |
// The match was to check the profitability, the changes made are not |
5249 |
// The match was to check the profitability, the changes made are not |
| 5250 |
// part of the original matcher. Therefore, they should be dropped |
5250 |
// part of the original matcher. Therefore, they should be dropped |
| 5251 |
// otherwise the original matcher will not present the right state. |
5251 |
// otherwise the original matcher will not present the right state. |
| 5252 |
TPT.rollback(LastKnownGood); |
5252 |
TPT.rollback(LastKnownGood); |
| 5253 |
|
5253 |
|
| 5254 |
// If the match didn't cover I, then it won't be shared by it. |
5254 |
// If the match didn't cover I, then it won't be shared by it. |
| 5255 |
if (!is_contained(MatchedAddrModeInsts, I)) |
5255 |
if (!is_contained(MatchedAddrModeInsts, I)) |
| 5256 |
return false; |
5256 |
return false; |
| 5257 |
|
5257 |
|
| 5258 |
MatchedAddrModeInsts.clear(); |
5258 |
MatchedAddrModeInsts.clear(); |
| 5259 |
} |
5259 |
} |
| 5260 |
|
5260 |
|
| 5261 |
return true; |
5261 |
return true; |
| 5262 |
} |
5262 |
} |
| 5263 |
|
5263 |
|
| 5264 |
/// Return true if the specified values are defined in a |
5264 |
/// Return true if the specified values are defined in a |
| 5265 |
/// different basic block than BB. |
5265 |
/// different basic block than BB. |
| 5266 |
static bool IsNonLocalValue(Value *V, BasicBlock *BB) { |
5266 |
static bool IsNonLocalValue(Value *V, BasicBlock *BB) { |
| 5267 |
if (Instruction *I = dyn_cast(V)) |
5267 |
if (Instruction *I = dyn_cast(V)) |
| 5268 |
return I->getParent() != BB; |
5268 |
return I->getParent() != BB; |
| 5269 |
return false; |
5269 |
return false; |
| 5270 |
} |
5270 |
} |
| 5271 |
|
5271 |
|
| 5272 |
/// Sink addressing mode computation immediate before MemoryInst if doing so |
5272 |
/// Sink addressing mode computation immediate before MemoryInst if doing so |
| 5273 |
/// can be done without increasing register pressure. The need for the |
5273 |
/// can be done without increasing register pressure. The need for the |
| 5274 |
/// register pressure constraint means this can end up being an all or nothing |
5274 |
/// register pressure constraint means this can end up being an all or nothing |
| 5275 |
/// decision for all uses of the same addressing computation. |
5275 |
/// decision for all uses of the same addressing computation. |
| 5276 |
/// |
5276 |
/// |
| 5277 |
/// Load and Store Instructions often have addressing modes that can do |
5277 |
/// Load and Store Instructions often have addressing modes that can do |
| 5278 |
/// significant amounts of computation. As such, instruction selection will try |
5278 |
/// significant amounts of computation. As such, instruction selection will try |
| 5279 |
/// to get the load or store to do as much computation as possible for the |
5279 |
/// to get the load or store to do as much computation as possible for the |
| 5280 |
/// program. The problem is that isel can only see within a single block. As |
5280 |
/// program. The problem is that isel can only see within a single block. As |
| 5281 |
/// such, we sink as much legal addressing mode work into the block as possible. |
5281 |
/// such, we sink as much legal addressing mode work into the block as possible. |
| 5282 |
/// |
5282 |
/// |
| 5283 |
/// This method is used to optimize both load/store and inline asms with memory |
5283 |
/// This method is used to optimize both load/store and inline asms with memory |
| 5284 |
/// operands. It's also used to sink addressing computations feeding into cold |
5284 |
/// operands. It's also used to sink addressing computations feeding into cold |
| 5285 |
/// call sites into their (cold) basic block. |
5285 |
/// call sites into their (cold) basic block. |
| 5286 |
/// |
5286 |
/// |
| 5287 |
/// The motivation for handling sinking into cold blocks is that doing so can |
5287 |
/// The motivation for handling sinking into cold blocks is that doing so can |
| 5288 |
/// both enable other address mode sinking (by satisfying the register pressure |
5288 |
/// both enable other address mode sinking (by satisfying the register pressure |
| 5289 |
/// constraint above), and reduce register pressure globally (by removing the |
5289 |
/// constraint above), and reduce register pressure globally (by removing the |
| 5290 |
/// addressing mode computation from the fast path entirely.). |
5290 |
/// addressing mode computation from the fast path entirely.). |
| 5291 |
bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, |
5291 |
bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, |
| 5292 |
Type *AccessTy, unsigned AddrSpace) { |
5292 |
Type *AccessTy, unsigned AddrSpace) { |
| 5293 |
Value *Repl = Addr; |
5293 |
Value *Repl = Addr; |
| 5294 |
|
5294 |
|
| 5295 |
// Try to collapse single-value PHI nodes. This is necessary to undo |
5295 |
// Try to collapse single-value PHI nodes. This is necessary to undo |
| 5296 |
// unprofitable PRE transformations. |
5296 |
// unprofitable PRE transformations. |
| 5297 |
SmallVector worklist; |
5297 |
SmallVector worklist; |
| 5298 |
SmallPtrSet Visited; |
5298 |
SmallPtrSet Visited; |
| 5299 |
worklist.push_back(Addr); |
5299 |
worklist.push_back(Addr); |
| 5300 |
|
5300 |
|
| 5301 |
// Use a worklist to iteratively look through PHI and select nodes, and |
5301 |
// Use a worklist to iteratively look through PHI and select nodes, and |
| 5302 |
// ensure that the addressing mode obtained from the non-PHI/select roots of |
5302 |
// ensure that the addressing mode obtained from the non-PHI/select roots of |
| 5303 |
// the graph are compatible. |
5303 |
// the graph are compatible. |
| 5304 |
bool PhiOrSelectSeen = false; |
5304 |
bool PhiOrSelectSeen = false; |
| 5305 |
SmallVector AddrModeInsts; |
5305 |
SmallVector AddrModeInsts; |
| 5306 |
const SimplifyQuery SQ(*DL, TLInfo); |
5306 |
const SimplifyQuery SQ(*DL, TLInfo); |
| 5307 |
AddressingModeCombiner AddrModes(SQ, Addr); |
5307 |
AddressingModeCombiner AddrModes(SQ, Addr); |
| 5308 |
TypePromotionTransaction TPT(RemovedInsts); |
5308 |
TypePromotionTransaction TPT(RemovedInsts); |
| 5309 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
5309 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 5310 |
TPT.getRestorationPoint(); |
5310 |
TPT.getRestorationPoint(); |
| 5311 |
while (!worklist.empty()) { |
5311 |
while (!worklist.empty()) { |
| 5312 |
Value *V = worklist.pop_back_val(); |
5312 |
Value *V = worklist.pop_back_val(); |
| 5313 |
|
5313 |
|
| 5314 |
// We allow traversing cyclic Phi nodes. |
5314 |
// We allow traversing cyclic Phi nodes. |
| 5315 |
// In case of success after this loop we ensure that traversing through |
5315 |
// In case of success after this loop we ensure that traversing through |
| 5316 |
// Phi nodes ends up with all cases to compute address of the form |
5316 |
// Phi nodes ends up with all cases to compute address of the form |
| 5317 |
// BaseGV + Base + Scale * Index + Offset |
5317 |
// BaseGV + Base + Scale * Index + Offset |
| 5318 |
// where Scale and Offset are constans and BaseGV, Base and Index |
5318 |
// where Scale and Offset are constans and BaseGV, Base and Index |
| 5319 |
// are exactly the same Values in all cases. |
5319 |
// are exactly the same Values in all cases. |
| 5320 |
// It means that BaseGV, Scale and Offset dominate our memory instruction |
5320 |
// It means that BaseGV, Scale and Offset dominate our memory instruction |
| 5321 |
// and have the same value as they had in address computation represented |
5321 |
// and have the same value as they had in address computation represented |
| 5322 |
// as Phi. So we can safely sink address computation to memory instruction. |
5322 |
// as Phi. So we can safely sink address computation to memory instruction. |
| 5323 |
if (!Visited.insert(V).second) |
5323 |
if (!Visited.insert(V).second) |
| 5324 |
continue; |
5324 |
continue; |
| 5325 |
|
5325 |
|
| 5326 |
// For a PHI node, push all of its incoming values. |
5326 |
// For a PHI node, push all of its incoming values. |
| 5327 |
if (PHINode *P = dyn_cast(V)) { |
5327 |
if (PHINode *P = dyn_cast(V)) { |
| 5328 |
append_range(worklist, P->incoming_values()); |
5328 |
append_range(worklist, P->incoming_values()); |
| 5329 |
PhiOrSelectSeen = true; |
5329 |
PhiOrSelectSeen = true; |
| 5330 |
continue; |
5330 |
continue; |
| 5331 |
} |
5331 |
} |
| 5332 |
// Similar for select. |
5332 |
// Similar for select. |
| 5333 |
if (SelectInst *SI = dyn_cast(V)) { |
5333 |
if (SelectInst *SI = dyn_cast(V)) { |
| 5334 |
worklist.push_back(SI->getFalseValue()); |
5334 |
worklist.push_back(SI->getFalseValue()); |
| 5335 |
worklist.push_back(SI->getTrueValue()); |
5335 |
worklist.push_back(SI->getTrueValue()); |
| 5336 |
PhiOrSelectSeen = true; |
5336 |
PhiOrSelectSeen = true; |
| 5337 |
continue; |
5337 |
continue; |
| 5338 |
} |
5338 |
} |
| 5339 |
|
5339 |
|
| 5340 |
// For non-PHIs, determine the addressing mode being computed. Note that |
5340 |
// For non-PHIs, determine the addressing mode being computed. Note that |
| 5341 |
// the result may differ depending on what other uses our candidate |
5341 |
// the result may differ depending on what other uses our candidate |
| 5342 |
// addressing instructions might have. |
5342 |
// addressing instructions might have. |
| 5343 |
AddrModeInsts.clear(); |
5343 |
AddrModeInsts.clear(); |
| 5344 |
std::pair, int64_t> LargeOffsetGEP(nullptr, |
5344 |
std::pair, int64_t> LargeOffsetGEP(nullptr, |
| 5345 |
0); |
5345 |
0); |
| 5346 |
// Defer the query (and possible computation of) the dom tree to point of |
5346 |
// Defer the query (and possible computation of) the dom tree to point of |
| 5347 |
// actual use. It's expected that most address matches don't actually need |
5347 |
// actual use. It's expected that most address matches don't actually need |
| 5348 |
// the domtree. |
5348 |
// the domtree. |
| 5349 |
auto getDTFn = [MemoryInst, this]() -> const DominatorTree & { |
5349 |
auto getDTFn = [MemoryInst, this]() -> const DominatorTree & { |
| 5350 |
Function *F = MemoryInst->getParent()->getParent(); |
5350 |
Function *F = MemoryInst->getParent()->getParent(); |
| 5351 |
return this->getDT(*F); |
5351 |
return this->getDT(*F); |
| 5352 |
}; |
5352 |
}; |
| 5353 |
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( |
5353 |
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( |
| 5354 |
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn, |
5354 |
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn, |
| 5355 |
*TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, |
5355 |
*TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, |
| 5356 |
BFI.get()); |
5356 |
BFI.get()); |
| 5357 |
|
5357 |
|
| 5358 |
GetElementPtrInst *GEP = LargeOffsetGEP.first; |
5358 |
GetElementPtrInst *GEP = LargeOffsetGEP.first; |
| 5359 |
if (GEP && !NewGEPBases.count(GEP)) { |
5359 |
if (GEP && !NewGEPBases.count(GEP)) { |
| 5360 |
// If splitting the underlying data structure can reduce the offset of a |
5360 |
// If splitting the underlying data structure can reduce the offset of a |
| 5361 |
// GEP, collect the GEP. Skip the GEPs that are the new bases of |
5361 |
// GEP, collect the GEP. Skip the GEPs that are the new bases of |
| 5362 |
// previously split data structures. |
5362 |
// previously split data structures. |
| 5363 |
LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); |
5363 |
LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); |
| 5364 |
LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size())); |
5364 |
LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size())); |
| 5365 |
} |
5365 |
} |
| 5366 |
|
5366 |
|
| 5367 |
NewAddrMode.OriginalValue = V; |
5367 |
NewAddrMode.OriginalValue = V; |
| 5368 |
if (!AddrModes.addNewAddrMode(NewAddrMode)) |
5368 |
if (!AddrModes.addNewAddrMode(NewAddrMode)) |
| 5369 |
break; |
5369 |
break; |
| 5370 |
} |
5370 |
} |
| 5371 |
|
5371 |
|
| 5372 |
// Try to combine the AddrModes we've collected. If we couldn't collect any, |
5372 |
// Try to combine the AddrModes we've collected. If we couldn't collect any, |
| 5373 |
// or we have multiple but either couldn't combine them or combining them |
5373 |
// or we have multiple but either couldn't combine them or combining them |
| 5374 |
// wouldn't do anything useful, bail out now. |
5374 |
// wouldn't do anything useful, bail out now. |
| 5375 |
if (!AddrModes.combineAddrModes()) { |
5375 |
if (!AddrModes.combineAddrModes()) { |
| 5376 |
TPT.rollback(LastKnownGood); |
5376 |
TPT.rollback(LastKnownGood); |
| 5377 |
return false; |
5377 |
return false; |
| 5378 |
} |
5378 |
} |
| 5379 |
bool Modified = TPT.commit(); |
5379 |
bool Modified = TPT.commit(); |
| 5380 |
|
5380 |
|
| 5381 |
// Get the combined AddrMode (or the only AddrMode, if we only had one). |
5381 |
// Get the combined AddrMode (or the only AddrMode, if we only had one). |
| 5382 |
ExtAddrMode AddrMode = AddrModes.getAddrMode(); |
5382 |
ExtAddrMode AddrMode = AddrModes.getAddrMode(); |
| 5383 |
|
5383 |
|
| 5384 |
// If all the instructions matched are already in this BB, don't do anything. |
5384 |
// If all the instructions matched are already in this BB, don't do anything. |
| 5385 |
// If we saw a Phi node then it is not local definitely, and if we saw a |
5385 |
// If we saw a Phi node then it is not local definitely, and if we saw a |
| 5386 |
// select then we want to push the address calculation past it even if it's |
5386 |
// select then we want to push the address calculation past it even if it's |
| 5387 |
// already in this BB. |
5387 |
// already in this BB. |
| 5388 |
if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { |
5388 |
if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { |
| 5389 |
return IsNonLocalValue(V, MemoryInst->getParent()); |
5389 |
return IsNonLocalValue(V, MemoryInst->getParent()); |
| 5390 |
})) { |
5390 |
})) { |
| 5391 |
LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode |
5391 |
LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode |
| 5392 |
<< "\n"); |
5392 |
<< "\n"); |
| 5393 |
return Modified; |
5393 |
return Modified; |
| 5394 |
} |
5394 |
} |
| 5395 |
|
5395 |
|
| 5396 |
// Insert this computation right after this user. Since our caller is |
5396 |
// Insert this computation right after this user. Since our caller is |
| 5397 |
// scanning from the top of the BB to the bottom, reuse of the expr are |
5397 |
// scanning from the top of the BB to the bottom, reuse of the expr are |
| 5398 |
// guaranteed to happen later. |
5398 |
// guaranteed to happen later. |
| 5399 |
IRBuilder<> Builder(MemoryInst); |
5399 |
IRBuilder<> Builder(MemoryInst); |
| 5400 |
|
5400 |
|
| 5401 |
// Now that we determined the addressing expression we want to use and know |
5401 |
// Now that we determined the addressing expression we want to use and know |
| 5402 |
// that we have to sink it into this block. Check to see if we have already |
5402 |
// that we have to sink it into this block. Check to see if we have already |
| 5403 |
// done this for some other load/store instr in this block. If so, reuse |
5403 |
// done this for some other load/store instr in this block. If so, reuse |
| 5404 |
// the computation. Before attempting reuse, check if the address is valid |
5404 |
// the computation. Before attempting reuse, check if the address is valid |
| 5405 |
// as it may have been erased. |
5405 |
// as it may have been erased. |
| 5406 |
|
5406 |
|
| 5407 |
WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; |
5407 |
WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; |
| 5408 |
|
5408 |
|
| 5409 |
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; |
5409 |
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; |
| 5410 |
Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); |
5410 |
Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); |
| 5411 |
if (SunkAddr) { |
5411 |
if (SunkAddr) { |
| 5412 |
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode |
5412 |
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode |
| 5413 |
<< " for " << *MemoryInst << "\n"); |
5413 |
<< " for " << *MemoryInst << "\n"); |
| 5414 |
if (SunkAddr->getType() != Addr->getType()) { |
5414 |
if (SunkAddr->getType() != Addr->getType()) { |
| 5415 |
if (SunkAddr->getType()->getPointerAddressSpace() != |
5415 |
if (SunkAddr->getType()->getPointerAddressSpace() != |
| 5416 |
Addr->getType()->getPointerAddressSpace() && |
5416 |
Addr->getType()->getPointerAddressSpace() && |
| 5417 |
!DL->isNonIntegralPointerType(Addr->getType())) { |
5417 |
!DL->isNonIntegralPointerType(Addr->getType())) { |
| 5418 |
// There are two reasons the address spaces might not match: a no-op |
5418 |
// There are two reasons the address spaces might not match: a no-op |
| 5419 |
// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a |
5419 |
// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a |
| 5420 |
// ptrtoint/inttoptr pair to ensure we match the original semantics. |
5420 |
// ptrtoint/inttoptr pair to ensure we match the original semantics. |
| 5421 |
// TODO: allow bitcast between different address space pointers with the |
5421 |
// TODO: allow bitcast between different address space pointers with the |
| 5422 |
// same size. |
5422 |
// same size. |
| 5423 |
SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); |
5423 |
SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); |
| 5424 |
SunkAddr = |
5424 |
SunkAddr = |
| 5425 |
Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); |
5425 |
Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); |
| 5426 |
} else |
5426 |
} else |
| 5427 |
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); |
5427 |
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); |
| 5428 |
} |
5428 |
} |
| 5429 |
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && |
5429 |
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && |
| 5430 |
SubtargetInfo->addrSinkUsingGEPs())) { |
5430 |
SubtargetInfo->addrSinkUsingGEPs())) { |
| 5431 |
// By default, we use the GEP-based method when AA is used later. This |
5431 |
// By default, we use the GEP-based method when AA is used later. This |
| 5432 |
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. |
5432 |
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. |
| 5433 |
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode |
5433 |
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode |
| 5434 |
<< " for " << *MemoryInst << "\n"); |
5434 |
<< " for " << *MemoryInst << "\n"); |
| 5435 |
Value *ResultPtr = nullptr, *ResultIndex = nullptr; |
5435 |
Value *ResultPtr = nullptr, *ResultIndex = nullptr; |
| 5436 |
|
5436 |
|
| 5437 |
// First, find the pointer. |
5437 |
// First, find the pointer. |
| 5438 |
if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { |
5438 |
if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { |
| 5439 |
ResultPtr = AddrMode.BaseReg; |
5439 |
ResultPtr = AddrMode.BaseReg; |
| 5440 |
AddrMode.BaseReg = nullptr; |
5440 |
AddrMode.BaseReg = nullptr; |
| 5441 |
} |
5441 |
} |
| 5442 |
|
5442 |
|
| 5443 |
if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { |
5443 |
if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { |
| 5444 |
// We can't add more than one pointer together, nor can we scale a |
5444 |
// We can't add more than one pointer together, nor can we scale a |
| 5445 |
// pointer (both of which seem meaningless). |
5445 |
// pointer (both of which seem meaningless). |
| 5446 |
if (ResultPtr || AddrMode.Scale != 1) |
5446 |
if (ResultPtr || AddrMode.Scale != 1) |
| 5447 |
return Modified; |
5447 |
return Modified; |
| 5448 |
|
5448 |
|
| 5449 |
ResultPtr = AddrMode.ScaledReg; |
5449 |
ResultPtr = AddrMode.ScaledReg; |
| 5450 |
AddrMode.Scale = 0; |
5450 |
AddrMode.Scale = 0; |
| 5451 |
} |
5451 |
} |
| 5452 |
|
5452 |
|
| 5453 |
// It is only safe to sign extend the BaseReg if we know that the math |
5453 |
// It is only safe to sign extend the BaseReg if we know that the math |
| 5454 |
// required to create it did not overflow before we extend it. Since |
5454 |
// required to create it did not overflow before we extend it. Since |
| 5455 |
// the original IR value was tossed in favor of a constant back when |
5455 |
// the original IR value was tossed in favor of a constant back when |
| 5456 |
// the AddrMode was created we need to bail out gracefully if widths |
5456 |
// the AddrMode was created we need to bail out gracefully if widths |
| 5457 |
// do not match instead of extending it. |
5457 |
// do not match instead of extending it. |
| 5458 |
// |
5458 |
// |
| 5459 |
// (See below for code to add the scale.) |
5459 |
// (See below for code to add the scale.) |
| 5460 |
if (AddrMode.Scale) { |
5460 |
if (AddrMode.Scale) { |
| 5461 |
Type *ScaledRegTy = AddrMode.ScaledReg->getType(); |
5461 |
Type *ScaledRegTy = AddrMode.ScaledReg->getType(); |
| 5462 |
if (cast(IntPtrTy)->getBitWidth() > |
5462 |
if (cast(IntPtrTy)->getBitWidth() > |
| 5463 |
cast(ScaledRegTy)->getBitWidth()) |
5463 |
cast(ScaledRegTy)->getBitWidth()) |
| 5464 |
return Modified; |
5464 |
return Modified; |
| 5465 |
} |
5465 |
} |
| 5466 |
|
5466 |
|
| 5467 |
if (AddrMode.BaseGV) { |
5467 |
if (AddrMode.BaseGV) { |
| 5468 |
if (ResultPtr) |
5468 |
if (ResultPtr) |
| 5469 |
return Modified; |
5469 |
return Modified; |
| 5470 |
|
5470 |
|
| 5471 |
ResultPtr = AddrMode.BaseGV; |
5471 |
ResultPtr = AddrMode.BaseGV; |
| 5472 |
} |
5472 |
} |
| 5473 |
|
5473 |
|
| 5474 |
// If the real base value actually came from an inttoptr, then the matcher |
5474 |
// If the real base value actually came from an inttoptr, then the matcher |
| 5475 |
// will look through it and provide only the integer value. In that case, |
5475 |
// will look through it and provide only the integer value. In that case, |
| 5476 |
// use it here. |
5476 |
// use it here. |
| 5477 |
if (!DL->isNonIntegralPointerType(Addr->getType())) { |
5477 |
if (!DL->isNonIntegralPointerType(Addr->getType())) { |
| 5478 |
if (!ResultPtr && AddrMode.BaseReg) { |
5478 |
if (!ResultPtr && AddrMode.BaseReg) { |
| 5479 |
ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), |
5479 |
ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), |
| 5480 |
"sunkaddr"); |
5480 |
"sunkaddr"); |
| 5481 |
AddrMode.BaseReg = nullptr; |
5481 |
AddrMode.BaseReg = nullptr; |
| 5482 |
} else if (!ResultPtr && AddrMode.Scale == 1) { |
5482 |
} else if (!ResultPtr && AddrMode.Scale == 1) { |
| 5483 |
ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), |
5483 |
ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), |
| 5484 |
"sunkaddr"); |
5484 |
"sunkaddr"); |
| 5485 |
AddrMode.Scale = 0; |
5485 |
AddrMode.Scale = 0; |
| 5486 |
} |
5486 |
} |
| 5487 |
} |
5487 |
} |
| 5488 |
|
5488 |
|
| 5489 |
if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale && |
5489 |
if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale && |
| 5490 |
!AddrMode.BaseOffs) { |
5490 |
!AddrMode.BaseOffs) { |
| 5491 |
SunkAddr = Constant::getNullValue(Addr->getType()); |
5491 |
SunkAddr = Constant::getNullValue(Addr->getType()); |
| 5492 |
} else if (!ResultPtr) { |
5492 |
} else if (!ResultPtr) { |
| 5493 |
return Modified; |
5493 |
return Modified; |
| 5494 |
} else { |
5494 |
} else { |
| 5495 |
Type *I8PtrTy = |
5495 |
Type *I8PtrTy = |
| 5496 |
Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); |
5496 |
Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); |
| 5497 |
Type *I8Ty = Builder.getInt8Ty(); |
5497 |
Type *I8Ty = Builder.getInt8Ty(); |
| 5498 |
|
5498 |
|
| 5499 |
// Start with the base register. Do this first so that subsequent address |
5499 |
// Start with the base register. Do this first so that subsequent address |
| 5500 |
// matching finds it last, which will prevent it from trying to match it |
5500 |
// matching finds it last, which will prevent it from trying to match it |
| 5501 |
// as the scaled value in case it happens to be a mul. That would be |
5501 |
// as the scaled value in case it happens to be a mul. That would be |
| 5502 |
// problematic if we've sunk a different mul for the scale, because then |
5502 |
// problematic if we've sunk a different mul for the scale, because then |
| 5503 |
// we'd end up sinking both muls. |
5503 |
// we'd end up sinking both muls. |
| 5504 |
if (AddrMode.BaseReg) { |
5504 |
if (AddrMode.BaseReg) { |
| 5505 |
Value *V = AddrMode.BaseReg; |
5505 |
Value *V = AddrMode.BaseReg; |
| 5506 |
if (V->getType() != IntPtrTy) |
5506 |
if (V->getType() != IntPtrTy) |
| 5507 |
V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); |
5507 |
V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); |
| 5508 |
|
5508 |
|
| 5509 |
ResultIndex = V; |
5509 |
ResultIndex = V; |
| 5510 |
} |
5510 |
} |
| 5511 |
|
5511 |
|
| 5512 |
// Add the scale value. |
5512 |
// Add the scale value. |
| 5513 |
if (AddrMode.Scale) { |
5513 |
if (AddrMode.Scale) { |
| 5514 |
Value *V = AddrMode.ScaledReg; |
5514 |
Value *V = AddrMode.ScaledReg; |
| 5515 |
if (V->getType() == IntPtrTy) { |
5515 |
if (V->getType() == IntPtrTy) { |
| 5516 |
// done. |
5516 |
// done. |
| 5517 |
} else { |
5517 |
} else { |
| 5518 |
assert(cast(IntPtrTy)->getBitWidth() < |
5518 |
assert(cast(IntPtrTy)->getBitWidth() < |
| 5519 |
cast(V->getType())->getBitWidth() && |
5519 |
cast(V->getType())->getBitWidth() && |
| 5520 |
"We can't transform if ScaledReg is too narrow"); |
5520 |
"We can't transform if ScaledReg is too narrow"); |
| 5521 |
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); |
5521 |
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); |
| 5522 |
} |
5522 |
} |
| 5523 |
|
5523 |
|
| 5524 |
if (AddrMode.Scale != 1) |
5524 |
if (AddrMode.Scale != 1) |
| 5525 |
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), |
5525 |
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), |
| 5526 |
"sunkaddr"); |
5526 |
"sunkaddr"); |
| 5527 |
if (ResultIndex) |
5527 |
if (ResultIndex) |
| 5528 |
ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); |
5528 |
ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); |
| 5529 |
else |
5529 |
else |
| 5530 |
ResultIndex = V; |
5530 |
ResultIndex = V; |
| 5531 |
} |
5531 |
} |
| 5532 |
|
5532 |
|
| 5533 |
// Add in the Base Offset if present. |
5533 |
// Add in the Base Offset if present. |
| 5534 |
if (AddrMode.BaseOffs) { |
5534 |
if (AddrMode.BaseOffs) { |
| 5535 |
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); |
5535 |
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); |
| 5536 |
if (ResultIndex) { |
5536 |
if (ResultIndex) { |
| 5537 |
// We need to add this separately from the scale above to help with |
5537 |
// We need to add this separately from the scale above to help with |
| 5538 |
// SDAG consecutive load/store merging. |
5538 |
// SDAG consecutive load/store merging. |
| 5539 |
if (ResultPtr->getType() != I8PtrTy) |
5539 |
if (ResultPtr->getType() != I8PtrTy) |
| 5540 |
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); |
5540 |
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); |
| 5541 |
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, |
5541 |
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, |
| 5542 |
"sunkaddr", AddrMode.InBounds); |
5542 |
"sunkaddr", AddrMode.InBounds); |
| 5543 |
} |
5543 |
} |
| 5544 |
|
5544 |
|
| 5545 |
ResultIndex = V; |
5545 |
ResultIndex = V; |
| 5546 |
} |
5546 |
} |
| 5547 |
|
5547 |
|
| 5548 |
if (!ResultIndex) { |
5548 |
if (!ResultIndex) { |
| 5549 |
SunkAddr = ResultPtr; |
5549 |
SunkAddr = ResultPtr; |
| 5550 |
} else { |
5550 |
} else { |
| 5551 |
if (ResultPtr->getType() != I8PtrTy) |
5551 |
if (ResultPtr->getType() != I8PtrTy) |
| 5552 |
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); |
5552 |
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); |
| 5553 |
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr", |
5553 |
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr", |
| 5554 |
AddrMode.InBounds); |
5554 |
AddrMode.InBounds); |
| 5555 |
} |
5555 |
} |
| 5556 |
|
5556 |
|
| 5557 |
if (SunkAddr->getType() != Addr->getType()) { |
5557 |
if (SunkAddr->getType() != Addr->getType()) { |
| 5558 |
if (SunkAddr->getType()->getPointerAddressSpace() != |
5558 |
if (SunkAddr->getType()->getPointerAddressSpace() != |
| 5559 |
Addr->getType()->getPointerAddressSpace() && |
5559 |
Addr->getType()->getPointerAddressSpace() && |
| 5560 |
!DL->isNonIntegralPointerType(Addr->getType())) { |
5560 |
!DL->isNonIntegralPointerType(Addr->getType())) { |
| 5561 |
// There are two reasons the address spaces might not match: a no-op |
5561 |
// There are two reasons the address spaces might not match: a no-op |
| 5562 |
// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a |
5562 |
// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a |
| 5563 |
// ptrtoint/inttoptr pair to ensure we match the original semantics. |
5563 |
// ptrtoint/inttoptr pair to ensure we match the original semantics. |
| 5564 |
// TODO: allow bitcast between different address space pointers with |
5564 |
// TODO: allow bitcast between different address space pointers with |
| 5565 |
// the same size. |
5565 |
// the same size. |
| 5566 |
SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); |
5566 |
SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); |
| 5567 |
SunkAddr = |
5567 |
SunkAddr = |
| 5568 |
Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); |
5568 |
Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); |
| 5569 |
} else |
5569 |
} else |
| 5570 |
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); |
5570 |
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); |
| 5571 |
} |
5571 |
} |
| 5572 |
} |
5572 |
} |
| 5573 |
} else { |
5573 |
} else { |
| 5574 |
// We'd require a ptrtoint/inttoptr down the line, which we can't do for |
5574 |
// We'd require a ptrtoint/inttoptr down the line, which we can't do for |
| 5575 |
// non-integral pointers, so in that case bail out now. |
5575 |
// non-integral pointers, so in that case bail out now. |
| 5576 |
Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; |
5576 |
Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; |
| 5577 |
Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; |
5577 |
Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; |
| 5578 |
PointerType *BasePtrTy = dyn_cast_or_null(BaseTy); |
5578 |
PointerType *BasePtrTy = dyn_cast_or_null(BaseTy); |
| 5579 |
PointerType *ScalePtrTy = dyn_cast_or_null(ScaleTy); |
5579 |
PointerType *ScalePtrTy = dyn_cast_or_null(ScaleTy); |
| 5580 |
if (DL->isNonIntegralPointerType(Addr->getType()) || |
5580 |
if (DL->isNonIntegralPointerType(Addr->getType()) || |
| 5581 |
(BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || |
5581 |
(BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || |
| 5582 |
(ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || |
5582 |
(ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || |
| 5583 |
(AddrMode.BaseGV && |
5583 |
(AddrMode.BaseGV && |
| 5584 |
DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) |
5584 |
DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) |
| 5585 |
return Modified; |
5585 |
return Modified; |
| 5586 |
|
5586 |
|
| 5587 |
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode |
5587 |
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode |
| 5588 |
<< " for " << *MemoryInst << "\n"); |
5588 |
<< " for " << *MemoryInst << "\n"); |
| 5589 |
Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); |
5589 |
Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); |
| 5590 |
Value *Result = nullptr; |
5590 |
Value *Result = nullptr; |
| 5591 |
|
5591 |
|
| 5592 |
// Start with the base register. Do this first so that subsequent address |
5592 |
// Start with the base register. Do this first so that subsequent address |
| 5593 |
// matching finds it last, which will prevent it from trying to match it |
5593 |
// matching finds it last, which will prevent it from trying to match it |
| 5594 |
// as the scaled value in case it happens to be a mul. That would be |
5594 |
// as the scaled value in case it happens to be a mul. That would be |
| 5595 |
// problematic if we've sunk a different mul for the scale, because then |
5595 |
// problematic if we've sunk a different mul for the scale, because then |
| 5596 |
// we'd end up sinking both muls. |
5596 |
// we'd end up sinking both muls. |
| 5597 |
if (AddrMode.BaseReg) { |
5597 |
if (AddrMode.BaseReg) { |
| 5598 |
Value *V = AddrMode.BaseReg; |
5598 |
Value *V = AddrMode.BaseReg; |
| 5599 |
if (V->getType()->isPointerTy()) |
5599 |
if (V->getType()->isPointerTy()) |
| 5600 |
V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); |
5600 |
V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); |
| 5601 |
if (V->getType() != IntPtrTy) |
5601 |
if (V->getType() != IntPtrTy) |
| 5602 |
V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); |
5602 |
V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); |
| 5603 |
Result = V; |
5603 |
Result = V; |
| 5604 |
} |
5604 |
} |
| 5605 |
|
5605 |
|
| 5606 |
// Add the scale value. |
5606 |
// Add the scale value. |
| 5607 |
if (AddrMode.Scale) { |
5607 |
if (AddrMode.Scale) { |
| 5608 |
Value *V = AddrMode.ScaledReg; |
5608 |
Value *V = AddrMode.ScaledReg; |
| 5609 |
if (V->getType() == IntPtrTy) { |
5609 |
if (V->getType() == IntPtrTy) { |
| 5610 |
// done. |
5610 |
// done. |
| 5611 |
} else if (V->getType()->isPointerTy()) { |
5611 |
} else if (V->getType()->isPointerTy()) { |
| 5612 |
V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); |
5612 |
V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); |
| 5613 |
} else if (cast(IntPtrTy)->getBitWidth() < |
5613 |
} else if (cast(IntPtrTy)->getBitWidth() < |
| 5614 |
cast(V->getType())->getBitWidth()) { |
5614 |
cast(V->getType())->getBitWidth()) { |
| 5615 |
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); |
5615 |
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); |
| 5616 |
} else { |
5616 |
} else { |
| 5617 |
// It is only safe to sign extend the BaseReg if we know that the math |
5617 |
// It is only safe to sign extend the BaseReg if we know that the math |
| 5618 |
// required to create it did not overflow before we extend it. Since |
5618 |
// required to create it did not overflow before we extend it. Since |
| 5619 |
// the original IR value was tossed in favor of a constant back when |
5619 |
// the original IR value was tossed in favor of a constant back when |
| 5620 |
// the AddrMode was created we need to bail out gracefully if widths |
5620 |
// the AddrMode was created we need to bail out gracefully if widths |
| 5621 |
// do not match instead of extending it. |
5621 |
// do not match instead of extending it. |
| 5622 |
Instruction *I = dyn_cast_or_null(Result); |
5622 |
Instruction *I = dyn_cast_or_null(Result); |
| 5623 |
if (I && (Result != AddrMode.BaseReg)) |
5623 |
if (I && (Result != AddrMode.BaseReg)) |
| 5624 |
I->eraseFromParent(); |
5624 |
I->eraseFromParent(); |
| 5625 |
return Modified; |
5625 |
return Modified; |
| 5626 |
} |
5626 |
} |
| 5627 |
if (AddrMode.Scale != 1) |
5627 |
if (AddrMode.Scale != 1) |
| 5628 |
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), |
5628 |
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), |
| 5629 |
"sunkaddr"); |
5629 |
"sunkaddr"); |
| 5630 |
if (Result) |
5630 |
if (Result) |
| 5631 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
5631 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
| 5632 |
else |
5632 |
else |
| 5633 |
Result = V; |
5633 |
Result = V; |
| 5634 |
} |
5634 |
} |
| 5635 |
|
5635 |
|
| 5636 |
// Add in the BaseGV if present. |
5636 |
// Add in the BaseGV if present. |
| 5637 |
if (AddrMode.BaseGV) { |
5637 |
if (AddrMode.BaseGV) { |
| 5638 |
Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); |
5638 |
Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); |
| 5639 |
if (Result) |
5639 |
if (Result) |
| 5640 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
5640 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
| 5641 |
else |
5641 |
else |
| 5642 |
Result = V; |
5642 |
Result = V; |
| 5643 |
} |
5643 |
} |
| 5644 |
|
5644 |
|
| 5645 |
// Add in the Base Offset if present. |
5645 |
// Add in the Base Offset if present. |
| 5646 |
if (AddrMode.BaseOffs) { |
5646 |
if (AddrMode.BaseOffs) { |
| 5647 |
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); |
5647 |
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); |
| 5648 |
if (Result) |
5648 |
if (Result) |
| 5649 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
5649 |
Result = Builder.CreateAdd(Result, V, "sunkaddr"); |
| 5650 |
else |
5650 |
else |
| 5651 |
Result = V; |
5651 |
Result = V; |
| 5652 |
} |
5652 |
} |
| 5653 |
|
5653 |
|
| 5654 |
if (!Result) |
5654 |
if (!Result) |
| 5655 |
SunkAddr = Constant::getNullValue(Addr->getType()); |
5655 |
SunkAddr = Constant::getNullValue(Addr->getType()); |
| 5656 |
else |
5656 |
else |
| 5657 |
SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); |
5657 |
SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); |
| 5658 |
} |
5658 |
} |
| 5659 |
|
5659 |
|
| 5660 |
MemoryInst->replaceUsesOfWith(Repl, SunkAddr); |
5660 |
MemoryInst->replaceUsesOfWith(Repl, SunkAddr); |
| 5661 |
// Store the newly computed address into the cache. In the case we reused a |
5661 |
// Store the newly computed address into the cache. In the case we reused a |
| 5662 |
// value, this should be idempotent. |
5662 |
// value, this should be idempotent. |
| 5663 |
SunkAddrs[Addr] = WeakTrackingVH(SunkAddr); |
5663 |
SunkAddrs[Addr] = WeakTrackingVH(SunkAddr); |
| 5664 |
|
5664 |
|
| 5665 |
// If we have no uses, recursively delete the value and all dead instructions |
5665 |
// If we have no uses, recursively delete the value and all dead instructions |
| 5666 |
// using it. |
5666 |
// using it. |
| 5667 |
if (Repl->use_empty()) { |
5667 |
if (Repl->use_empty()) { |
| 5668 |
resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() { |
5668 |
resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() { |
| 5669 |
RecursivelyDeleteTriviallyDeadInstructions( |
5669 |
RecursivelyDeleteTriviallyDeadInstructions( |
| 5670 |
Repl, TLInfo, nullptr, |
5670 |
Repl, TLInfo, nullptr, |
| 5671 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
5671 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
| 5672 |
}); |
5672 |
}); |
| 5673 |
} |
5673 |
} |
| 5674 |
++NumMemoryInsts; |
5674 |
++NumMemoryInsts; |
| 5675 |
return true; |
5675 |
return true; |
| 5676 |
} |
5676 |
} |
| 5677 |
|
5677 |
|
| 5678 |
/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find |
5678 |
/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find |
| 5679 |
/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can |
5679 |
/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can |
| 5680 |
/// only handle a 2 operand GEP in the same basic block or a splat constant |
5680 |
/// only handle a 2 operand GEP in the same basic block or a splat constant |
| 5681 |
/// vector. The 2 operands to the GEP must have a scalar pointer and a vector |
5681 |
/// vector. The 2 operands to the GEP must have a scalar pointer and a vector |
| 5682 |
/// index. |
5682 |
/// index. |
| 5683 |
/// |
5683 |
/// |
| 5684 |
/// If the existing GEP has a vector base pointer that is splat, we can look |
5684 |
/// If the existing GEP has a vector base pointer that is splat, we can look |
| 5685 |
/// through the splat to find the scalar pointer. If we can't find a scalar |
5685 |
/// through the splat to find the scalar pointer. If we can't find a scalar |
| 5686 |
/// pointer there's nothing we can do. |
5686 |
/// pointer there's nothing we can do. |
| 5687 |
/// |
5687 |
/// |
| 5688 |
/// If we have a GEP with more than 2 indices where the middle indices are all |
5688 |
/// If we have a GEP with more than 2 indices where the middle indices are all |
| 5689 |
/// zeroes, we can replace it with 2 GEPs where the second has 2 operands. |
5689 |
/// zeroes, we can replace it with 2 GEPs where the second has 2 operands. |
| 5690 |
/// |
5690 |
/// |
| 5691 |
/// If the final index isn't a vector or is a splat, we can emit a scalar GEP |
5691 |
/// If the final index isn't a vector or is a splat, we can emit a scalar GEP |
| 5692 |
/// followed by a GEP with an all zeroes vector index. This will enable |
5692 |
/// followed by a GEP with an all zeroes vector index. This will enable |
| 5693 |
/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a |
5693 |
/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a |
| 5694 |
/// zero index. |
5694 |
/// zero index. |
| 5695 |
bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, |
5695 |
bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, |
| 5696 |
Value *Ptr) { |
5696 |
Value *Ptr) { |
| 5697 |
Value *NewAddr; |
5697 |
Value *NewAddr; |
| 5698 |
|
5698 |
|
| 5699 |
if (const auto *GEP = dyn_cast(Ptr)) { |
5699 |
if (const auto *GEP = dyn_cast(Ptr)) { |
| 5700 |
// Don't optimize GEPs that don't have indices. |
5700 |
// Don't optimize GEPs that don't have indices. |
| 5701 |
if (!GEP->hasIndices()) |
5701 |
if (!GEP->hasIndices()) |
| 5702 |
return false; |
5702 |
return false; |
| 5703 |
|
5703 |
|
| 5704 |
// If the GEP and the gather/scatter aren't in the same BB, don't optimize. |
5704 |
// If the GEP and the gather/scatter aren't in the same BB, don't optimize. |
| 5705 |
// FIXME: We should support this by sinking the GEP. |
5705 |
// FIXME: We should support this by sinking the GEP. |
| 5706 |
if (MemoryInst->getParent() != GEP->getParent()) |
5706 |
if (MemoryInst->getParent() != GEP->getParent()) |
| 5707 |
return false; |
5707 |
return false; |
| 5708 |
|
5708 |
|
| 5709 |
SmallVector Ops(GEP->operands()); |
5709 |
SmallVector Ops(GEP->operands()); |
| 5710 |
|
5710 |
|
| 5711 |
bool RewriteGEP = false; |
5711 |
bool RewriteGEP = false; |
| 5712 |
|
5712 |
|
| 5713 |
if (Ops[0]->getType()->isVectorTy()) { |
5713 |
if (Ops[0]->getType()->isVectorTy()) { |
| 5714 |
Ops[0] = getSplatValue(Ops[0]); |
5714 |
Ops[0] = getSplatValue(Ops[0]); |
| 5715 |
if (!Ops[0]) |
5715 |
if (!Ops[0]) |
| 5716 |
return false; |
5716 |
return false; |
| 5717 |
RewriteGEP = true; |
5717 |
RewriteGEP = true; |
| 5718 |
} |
5718 |
} |
| 5719 |
|
5719 |
|
| 5720 |
unsigned FinalIndex = Ops.size() - 1; |
5720 |
unsigned FinalIndex = Ops.size() - 1; |
| 5721 |
|
5721 |
|
| 5722 |
// Ensure all but the last index is 0. |
5722 |
// Ensure all but the last index is 0. |
| 5723 |
// FIXME: This isn't strictly required. All that's required is that they are |
5723 |
// FIXME: This isn't strictly required. All that's required is that they are |
| 5724 |
// all scalars or splats. |
5724 |
// all scalars or splats. |
| 5725 |
for (unsigned i = 1; i < FinalIndex; ++i) { |
5725 |
for (unsigned i = 1; i < FinalIndex; ++i) { |
| 5726 |
auto *C = dyn_cast(Ops[i]); |
5726 |
auto *C = dyn_cast(Ops[i]); |
| 5727 |
if (!C) |
5727 |
if (!C) |
| 5728 |
return false; |
5728 |
return false; |
| 5729 |
if (isa(C->getType())) |
5729 |
if (isa(C->getType())) |
| 5730 |
C = C->getSplatValue(); |
5730 |
C = C->getSplatValue(); |
| 5731 |
auto *CI = dyn_cast_or_null(C); |
5731 |
auto *CI = dyn_cast_or_null(C); |
| 5732 |
if (!CI || !CI->isZero()) |
5732 |
if (!CI || !CI->isZero()) |
| 5733 |
return false; |
5733 |
return false; |
| 5734 |
// Scalarize the index if needed. |
5734 |
// Scalarize the index if needed. |
| 5735 |
Ops[i] = CI; |
5735 |
Ops[i] = CI; |
| 5736 |
} |
5736 |
} |
| 5737 |
|
5737 |
|
| 5738 |
// Try to scalarize the final index. |
5738 |
// Try to scalarize the final index. |
| 5739 |
if (Ops[FinalIndex]->getType()->isVectorTy()) { |
5739 |
if (Ops[FinalIndex]->getType()->isVectorTy()) { |
| 5740 |
if (Value *V = getSplatValue(Ops[FinalIndex])) { |
5740 |
if (Value *V = getSplatValue(Ops[FinalIndex])) { |
| 5741 |
auto *C = dyn_cast(V); |
5741 |
auto *C = dyn_cast(V); |
| 5742 |
// Don't scalarize all zeros vector. |
5742 |
// Don't scalarize all zeros vector. |
| 5743 |
if (!C || !C->isZero()) { |
5743 |
if (!C || !C->isZero()) { |
| 5744 |
Ops[FinalIndex] = V; |
5744 |
Ops[FinalIndex] = V; |
| 5745 |
RewriteGEP = true; |
5745 |
RewriteGEP = true; |
| 5746 |
} |
5746 |
} |
| 5747 |
} |
5747 |
} |
| 5748 |
} |
5748 |
} |
| 5749 |
|
5749 |
|
| 5750 |
// If we made any changes or the we have extra operands, we need to generate |
5750 |
// If we made any changes or the we have extra operands, we need to generate |
| 5751 |
// new instructions. |
5751 |
// new instructions. |
| 5752 |
if (!RewriteGEP && Ops.size() == 2) |
5752 |
if (!RewriteGEP && Ops.size() == 2) |
| 5753 |
return false; |
5753 |
return false; |
| 5754 |
|
5754 |
|
| 5755 |
auto NumElts = cast(Ptr->getType())->getElementCount(); |
5755 |
auto NumElts = cast(Ptr->getType())->getElementCount(); |
| 5756 |
|
5756 |
|
| 5757 |
IRBuilder<> Builder(MemoryInst); |
5757 |
IRBuilder<> Builder(MemoryInst); |
| 5758 |
|
5758 |
|
| 5759 |
Type *SourceTy = GEP->getSourceElementType(); |
5759 |
Type *SourceTy = GEP->getSourceElementType(); |
| 5760 |
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); |
5760 |
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType()); |
| 5761 |
|
5761 |
|
| 5762 |
// If the final index isn't a vector, emit a scalar GEP containing all ops |
5762 |
// If the final index isn't a vector, emit a scalar GEP containing all ops |
| 5763 |
// and a vector GEP with all zeroes final index. |
5763 |
// and a vector GEP with all zeroes final index. |
| 5764 |
if (!Ops[FinalIndex]->getType()->isVectorTy()) { |
5764 |
if (!Ops[FinalIndex]->getType()->isVectorTy()) { |
| 5765 |
NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front()); |
5765 |
NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front()); |
| 5766 |
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); |
5766 |
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); |
| 5767 |
auto *SecondTy = GetElementPtrInst::getIndexedType( |
5767 |
auto *SecondTy = GetElementPtrInst::getIndexedType( |
| 5768 |
SourceTy, ArrayRef(Ops).drop_front()); |
5768 |
SourceTy, ArrayRef(Ops).drop_front()); |
| 5769 |
NewAddr = |
5769 |
NewAddr = |
| 5770 |
Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy)); |
5770 |
Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy)); |
| 5771 |
} else { |
5771 |
} else { |
| 5772 |
Value *Base = Ops[0]; |
5772 |
Value *Base = Ops[0]; |
| 5773 |
Value *Index = Ops[FinalIndex]; |
5773 |
Value *Index = Ops[FinalIndex]; |
| 5774 |
|
5774 |
|
| 5775 |
// Create a scalar GEP if there are more than 2 operands. |
5775 |
// Create a scalar GEP if there are more than 2 operands. |
| 5776 |
if (Ops.size() != 2) { |
5776 |
if (Ops.size() != 2) { |
| 5777 |
// Replace the last index with 0. |
5777 |
// Replace the last index with 0. |
| 5778 |
Ops[FinalIndex] = |
5778 |
Ops[FinalIndex] = |
| 5779 |
Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType()); |
5779 |
Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType()); |
| 5780 |
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front()); |
5780 |
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front()); |
| 5781 |
SourceTy = GetElementPtrInst::getIndexedType( |
5781 |
SourceTy = GetElementPtrInst::getIndexedType( |
| 5782 |
SourceTy, ArrayRef(Ops).drop_front()); |
5782 |
SourceTy, ArrayRef(Ops).drop_front()); |
| 5783 |
} |
5783 |
} |
| 5784 |
|
5784 |
|
| 5785 |
// Now create the GEP with scalar pointer and vector index. |
5785 |
// Now create the GEP with scalar pointer and vector index. |
| 5786 |
NewAddr = Builder.CreateGEP(SourceTy, Base, Index); |
5786 |
NewAddr = Builder.CreateGEP(SourceTy, Base, Index); |
| 5787 |
} |
5787 |
} |
| 5788 |
} else if (!isa(Ptr)) { |
5788 |
} else if (!isa(Ptr)) { |
| 5789 |
// Not a GEP, maybe its a splat and we can create a GEP to enable |
5789 |
// Not a GEP, maybe its a splat and we can create a GEP to enable |
| 5790 |
// SelectionDAGBuilder to use it as a uniform base. |
5790 |
// SelectionDAGBuilder to use it as a uniform base. |
| 5791 |
Value *V = getSplatValue(Ptr); |
5791 |
Value *V = getSplatValue(Ptr); |
| 5792 |
if (!V) |
5792 |
if (!V) |
| 5793 |
return false; |
5793 |
return false; |
| 5794 |
|
5794 |
|
| 5795 |
auto NumElts = cast(Ptr->getType())->getElementCount(); |
5795 |
auto NumElts = cast(Ptr->getType())->getElementCount(); |
| 5796 |
|
5796 |
|
| 5797 |
IRBuilder<> Builder(MemoryInst); |
5797 |
IRBuilder<> Builder(MemoryInst); |
| 5798 |
|
5798 |
|
| 5799 |
// Emit a vector GEP with a scalar pointer and all 0s vector index. |
5799 |
// Emit a vector GEP with a scalar pointer and all 0s vector index. |
| 5800 |
Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); |
5800 |
Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType()); |
| 5801 |
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); |
5801 |
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts); |
| 5802 |
Type *ScalarTy; |
5802 |
Type *ScalarTy; |
| 5803 |
if (cast(MemoryInst)->getIntrinsicID() == |
5803 |
if (cast(MemoryInst)->getIntrinsicID() == |
| 5804 |
Intrinsic::masked_gather) { |
5804 |
Intrinsic::masked_gather) { |
| 5805 |
ScalarTy = MemoryInst->getType()->getScalarType(); |
5805 |
ScalarTy = MemoryInst->getType()->getScalarType(); |
| 5806 |
} else { |
5806 |
} else { |
| 5807 |
assert(cast(MemoryInst)->getIntrinsicID() == |
5807 |
assert(cast(MemoryInst)->getIntrinsicID() == |
| 5808 |
Intrinsic::masked_scatter); |
5808 |
Intrinsic::masked_scatter); |
| 5809 |
ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType(); |
5809 |
ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType(); |
| 5810 |
} |
5810 |
} |
| 5811 |
NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy)); |
5811 |
NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy)); |
| 5812 |
} else { |
5812 |
} else { |
| 5813 |
// Constant, SelectionDAGBuilder knows to check if its a splat. |
5813 |
// Constant, SelectionDAGBuilder knows to check if its a splat. |
| 5814 |
return false; |
5814 |
return false; |
| 5815 |
} |
5815 |
} |
| 5816 |
|
5816 |
|
| 5817 |
MemoryInst->replaceUsesOfWith(Ptr, NewAddr); |
5817 |
MemoryInst->replaceUsesOfWith(Ptr, NewAddr); |
| 5818 |
|
5818 |
|
| 5819 |
// If we have no uses, recursively delete the value and all dead instructions |
5819 |
// If we have no uses, recursively delete the value and all dead instructions |
| 5820 |
// using it. |
5820 |
// using it. |
| 5821 |
if (Ptr->use_empty()) |
5821 |
if (Ptr->use_empty()) |
| 5822 |
RecursivelyDeleteTriviallyDeadInstructions( |
5822 |
RecursivelyDeleteTriviallyDeadInstructions( |
| 5823 |
Ptr, TLInfo, nullptr, |
5823 |
Ptr, TLInfo, nullptr, |
| 5824 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
5824 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
| 5825 |
|
5825 |
|
| 5826 |
return true; |
5826 |
return true; |
| 5827 |
} |
5827 |
} |
| 5828 |
|
5828 |
|
| 5829 |
/// If there are any memory operands, use OptimizeMemoryInst to sink their |
5829 |
/// If there are any memory operands, use OptimizeMemoryInst to sink their |
| 5830 |
/// address computing into the block when possible / profitable. |
5830 |
/// address computing into the block when possible / profitable. |
| 5831 |
bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { |
5831 |
bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { |
| 5832 |
bool MadeChange = false; |
5832 |
bool MadeChange = false; |
| 5833 |
|
5833 |
|
| 5834 |
const TargetRegisterInfo *TRI = |
5834 |
const TargetRegisterInfo *TRI = |
| 5835 |
TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); |
5835 |
TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); |
| 5836 |
TargetLowering::AsmOperandInfoVector TargetConstraints = |
5836 |
TargetLowering::AsmOperandInfoVector TargetConstraints = |
| 5837 |
TLI->ParseConstraints(*DL, TRI, *CS); |
5837 |
TLI->ParseConstraints(*DL, TRI, *CS); |
| 5838 |
unsigned ArgNo = 0; |
5838 |
unsigned ArgNo = 0; |
| 5839 |
for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { |
5839 |
for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) { |
| 5840 |
// Compute the constraint code and ConstraintType to use. |
5840 |
// Compute the constraint code and ConstraintType to use. |
| 5841 |
TLI->ComputeConstraintToUse(OpInfo, SDValue()); |
5841 |
TLI->ComputeConstraintToUse(OpInfo, SDValue()); |
| 5842 |
|
5842 |
|
| 5843 |
// TODO: Also handle C_Address? |
5843 |
// TODO: Also handle C_Address? |
| 5844 |
if (OpInfo.ConstraintType == TargetLowering::C_Memory && |
5844 |
if (OpInfo.ConstraintType == TargetLowering::C_Memory && |
| 5845 |
OpInfo.isIndirect) { |
5845 |
OpInfo.isIndirect) { |
| 5846 |
Value *OpVal = CS->getArgOperand(ArgNo++); |
5846 |
Value *OpVal = CS->getArgOperand(ArgNo++); |
| 5847 |
MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); |
5847 |
MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); |
| 5848 |
} else if (OpInfo.Type == InlineAsm::isInput) |
5848 |
} else if (OpInfo.Type == InlineAsm::isInput) |
| 5849 |
ArgNo++; |
5849 |
ArgNo++; |
| 5850 |
} |
5850 |
} |
| 5851 |
|
5851 |
|
| 5852 |
return MadeChange; |
5852 |
return MadeChange; |
| 5853 |
} |
5853 |
} |
| 5854 |
|
5854 |
|
| 5855 |
/// Check if all the uses of \p Val are equivalent (or free) zero or |
5855 |
/// Check if all the uses of \p Val are equivalent (or free) zero or |
| 5856 |
/// sign extensions. |
5856 |
/// sign extensions. |
| 5857 |
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { |
5857 |
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { |
| 5858 |
assert(!Val->use_empty() && "Input must have at least one use"); |
5858 |
assert(!Val->use_empty() && "Input must have at least one use"); |
| 5859 |
const Instruction *FirstUser = cast(*Val->user_begin()); |
5859 |
const Instruction *FirstUser = cast(*Val->user_begin()); |
| 5860 |
bool IsSExt = isa(FirstUser); |
5860 |
bool IsSExt = isa(FirstUser); |
| 5861 |
Type *ExtTy = FirstUser->getType(); |
5861 |
Type *ExtTy = FirstUser->getType(); |
| 5862 |
for (const User *U : Val->users()) { |
5862 |
for (const User *U : Val->users()) { |
| 5863 |
const Instruction *UI = cast(U); |
5863 |
const Instruction *UI = cast(U); |
| 5864 |
if ((IsSExt && !isa(UI)) || (!IsSExt && !isa(UI))) |
5864 |
if ((IsSExt && !isa(UI)) || (!IsSExt && !isa(UI))) |
| 5865 |
return false; |
5865 |
return false; |
| 5866 |
Type *CurTy = UI->getType(); |
5866 |
Type *CurTy = UI->getType(); |
| 5867 |
// Same input and output types: Same instruction after CSE. |
5867 |
// Same input and output types: Same instruction after CSE. |
| 5868 |
if (CurTy == ExtTy) |
5868 |
if (CurTy == ExtTy) |
| 5869 |
continue; |
5869 |
continue; |
| 5870 |
|
5870 |
|
| 5871 |
// If IsSExt is true, we are in this situation: |
5871 |
// If IsSExt is true, we are in this situation: |
| 5872 |
// a = Val |
5872 |
// a = Val |
| 5873 |
// b = sext ty1 a to ty2 |
5873 |
// b = sext ty1 a to ty2 |
| 5874 |
// c = sext ty1 a to ty3 |
5874 |
// c = sext ty1 a to ty3 |
| 5875 |
// Assuming ty2 is shorter than ty3, this could be turned into: |
5875 |
// Assuming ty2 is shorter than ty3, this could be turned into: |
| 5876 |
// a = Val |
5876 |
// a = Val |
| 5877 |
// b = sext ty1 a to ty2 |
5877 |
// b = sext ty1 a to ty2 |
| 5878 |
// c = sext ty2 b to ty3 |
5878 |
// c = sext ty2 b to ty3 |
| 5879 |
// However, the last sext is not free. |
5879 |
// However, the last sext is not free. |
| 5880 |
if (IsSExt) |
5880 |
if (IsSExt) |
| 5881 |
return false; |
5881 |
return false; |
| 5882 |
|
5882 |
|
| 5883 |
// This is a ZExt, maybe this is free to extend from one type to another. |
5883 |
// This is a ZExt, maybe this is free to extend from one type to another. |
| 5884 |
// In that case, we would not account for a different use. |
5884 |
// In that case, we would not account for a different use. |
| 5885 |
Type *NarrowTy; |
5885 |
Type *NarrowTy; |
| 5886 |
Type *LargeTy; |
5886 |
Type *LargeTy; |
| 5887 |
if (ExtTy->getScalarType()->getIntegerBitWidth() > |
5887 |
if (ExtTy->getScalarType()->getIntegerBitWidth() > |
| 5888 |
CurTy->getScalarType()->getIntegerBitWidth()) { |
5888 |
CurTy->getScalarType()->getIntegerBitWidth()) { |
| 5889 |
NarrowTy = CurTy; |
5889 |
NarrowTy = CurTy; |
| 5890 |
LargeTy = ExtTy; |
5890 |
LargeTy = ExtTy; |
| 5891 |
} else { |
5891 |
} else { |
| 5892 |
NarrowTy = ExtTy; |
5892 |
NarrowTy = ExtTy; |
| 5893 |
LargeTy = CurTy; |
5893 |
LargeTy = CurTy; |
| 5894 |
} |
5894 |
} |
| 5895 |
|
5895 |
|
| 5896 |
if (!TLI.isZExtFree(NarrowTy, LargeTy)) |
5896 |
if (!TLI.isZExtFree(NarrowTy, LargeTy)) |
| 5897 |
return false; |
5897 |
return false; |
| 5898 |
} |
5898 |
} |
| 5899 |
// All uses are the same or can be derived from one another for free. |
5899 |
// All uses are the same or can be derived from one another for free. |
| 5900 |
return true; |
5900 |
return true; |
| 5901 |
} |
5901 |
} |
| 5902 |
|
5902 |
|
| 5903 |
/// Try to speculatively promote extensions in \p Exts and continue |
5903 |
/// Try to speculatively promote extensions in \p Exts and continue |
| 5904 |
/// promoting through newly promoted operands recursively as far as doing so is |
5904 |
/// promoting through newly promoted operands recursively as far as doing so is |
| 5905 |
/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. |
5905 |
/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. |
| 5906 |
/// When some promotion happened, \p TPT contains the proper state to revert |
5906 |
/// When some promotion happened, \p TPT contains the proper state to revert |
| 5907 |
/// them. |
5907 |
/// them. |
| 5908 |
/// |
5908 |
/// |
| 5909 |
/// \return true if some promotion happened, false otherwise. |
5909 |
/// \return true if some promotion happened, false otherwise. |
| 5910 |
bool CodeGenPrepare::tryToPromoteExts( |
5910 |
bool CodeGenPrepare::tryToPromoteExts( |
| 5911 |
TypePromotionTransaction &TPT, const SmallVectorImpl &Exts, |
5911 |
TypePromotionTransaction &TPT, const SmallVectorImpl &Exts, |
| 5912 |
SmallVectorImpl &ProfitablyMovedExts, |
5912 |
SmallVectorImpl &ProfitablyMovedExts, |
| 5913 |
unsigned CreatedInstsCost) { |
5913 |
unsigned CreatedInstsCost) { |
| 5914 |
bool Promoted = false; |
5914 |
bool Promoted = false; |
| 5915 |
|
5915 |
|
| 5916 |
// Iterate over all the extensions to try to promote them. |
5916 |
// Iterate over all the extensions to try to promote them. |
| 5917 |
for (auto *I : Exts) { |
5917 |
for (auto *I : Exts) { |
| 5918 |
// Early check if we directly have ext(load). |
5918 |
// Early check if we directly have ext(load). |
| 5919 |
if (isa(I->getOperand(0))) { |
5919 |
if (isa(I->getOperand(0))) { |
| 5920 |
ProfitablyMovedExts.push_back(I); |
5920 |
ProfitablyMovedExts.push_back(I); |
| 5921 |
continue; |
5921 |
continue; |
| 5922 |
} |
5922 |
} |
| 5923 |
|
5923 |
|
| 5924 |
// Check whether or not we want to do any promotion. The reason we have |
5924 |
// Check whether or not we want to do any promotion. The reason we have |
| 5925 |
// this check inside the for loop is to catch the case where an extension |
5925 |
// this check inside the for loop is to catch the case where an extension |
| 5926 |
// is directly fed by a load because in such case the extension can be moved |
5926 |
// is directly fed by a load because in such case the extension can be moved |
| 5927 |
// up without any promotion on its operands. |
5927 |
// up without any promotion on its operands. |
| 5928 |
if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion) |
5928 |
if (!TLI->enableExtLdPromotion() || DisableExtLdPromotion) |
| 5929 |
return false; |
5929 |
return false; |
| 5930 |
|
5930 |
|
| 5931 |
// Get the action to perform the promotion. |
5931 |
// Get the action to perform the promotion. |
| 5932 |
TypePromotionHelper::Action TPH = |
5932 |
TypePromotionHelper::Action TPH = |
| 5933 |
TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts); |
5933 |
TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts); |
| 5934 |
// Check if we can promote. |
5934 |
// Check if we can promote. |
| 5935 |
if (!TPH) { |
5935 |
if (!TPH) { |
| 5936 |
// Save the current extension as we cannot move up through its operand. |
5936 |
// Save the current extension as we cannot move up through its operand. |
| 5937 |
ProfitablyMovedExts.push_back(I); |
5937 |
ProfitablyMovedExts.push_back(I); |
| 5938 |
continue; |
5938 |
continue; |
| 5939 |
} |
5939 |
} |
| 5940 |
|
5940 |
|
| 5941 |
// Save the current state. |
5941 |
// Save the current state. |
| 5942 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
5942 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 5943 |
TPT.getRestorationPoint(); |
5943 |
TPT.getRestorationPoint(); |
| 5944 |
SmallVector NewExts; |
5944 |
SmallVector NewExts; |
| 5945 |
unsigned NewCreatedInstsCost = 0; |
5945 |
unsigned NewCreatedInstsCost = 0; |
| 5946 |
unsigned ExtCost = !TLI->isExtFree(I); |
5946 |
unsigned ExtCost = !TLI->isExtFree(I); |
| 5947 |
// Promote. |
5947 |
// Promote. |
| 5948 |
Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, |
5948 |
Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, |
| 5949 |
&NewExts, nullptr, *TLI); |
5949 |
&NewExts, nullptr, *TLI); |
| 5950 |
assert(PromotedVal && |
5950 |
assert(PromotedVal && |
| 5951 |
"TypePromotionHelper should have filtered out those cases"); |
5951 |
"TypePromotionHelper should have filtered out those cases"); |
| 5952 |
|
5952 |
|
| 5953 |
// We would be able to merge only one extension in a load. |
5953 |
// We would be able to merge only one extension in a load. |
| 5954 |
// Therefore, if we have more than 1 new extension we heuristically |
5954 |
// Therefore, if we have more than 1 new extension we heuristically |
| 5955 |
// cut this search path, because it means we degrade the code quality. |
5955 |
// cut this search path, because it means we degrade the code quality. |
| 5956 |
// With exactly 2, the transformation is neutral, because we will merge |
5956 |
// With exactly 2, the transformation is neutral, because we will merge |
| 5957 |
// one extension but leave one. However, we optimistically keep going, |
5957 |
// one extension but leave one. However, we optimistically keep going, |
| 5958 |
// because the new extension may be removed too. |
5958 |
// because the new extension may be removed too. |
| 5959 |
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; |
5959 |
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; |
| 5960 |
// FIXME: It would be possible to propagate a negative value instead of |
5960 |
// FIXME: It would be possible to propagate a negative value instead of |
| 5961 |
// conservatively ceiling it to 0. |
5961 |
// conservatively ceiling it to 0. |
| 5962 |
TotalCreatedInstsCost = |
5962 |
TotalCreatedInstsCost = |
| 5963 |
std::max((long long)0, (TotalCreatedInstsCost - ExtCost)); |
5963 |
std::max((long long)0, (TotalCreatedInstsCost - ExtCost)); |
| 5964 |
if (!StressExtLdPromotion && |
5964 |
if (!StressExtLdPromotion && |
| 5965 |
(TotalCreatedInstsCost > 1 || |
5965 |
(TotalCreatedInstsCost > 1 || |
| 5966 |
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { |
5966 |
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { |
| 5967 |
// This promotion is not profitable, rollback to the previous state, and |
5967 |
// This promotion is not profitable, rollback to the previous state, and |
| 5968 |
// save the current extension in ProfitablyMovedExts as the latest |
5968 |
// save the current extension in ProfitablyMovedExts as the latest |
| 5969 |
// speculative promotion turned out to be unprofitable. |
5969 |
// speculative promotion turned out to be unprofitable. |
| 5970 |
TPT.rollback(LastKnownGood); |
5970 |
TPT.rollback(LastKnownGood); |
| 5971 |
ProfitablyMovedExts.push_back(I); |
5971 |
ProfitablyMovedExts.push_back(I); |
| 5972 |
continue; |
5972 |
continue; |
| 5973 |
} |
5973 |
} |
| 5974 |
// Continue promoting NewExts as far as doing so is profitable. |
5974 |
// Continue promoting NewExts as far as doing so is profitable. |
| 5975 |
SmallVector NewlyMovedExts; |
5975 |
SmallVector NewlyMovedExts; |
| 5976 |
(void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost); |
5976 |
(void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost); |
| 5977 |
bool NewPromoted = false; |
5977 |
bool NewPromoted = false; |
| 5978 |
for (auto *ExtInst : NewlyMovedExts) { |
5978 |
for (auto *ExtInst : NewlyMovedExts) { |
| 5979 |
Instruction *MovedExt = cast(ExtInst); |
5979 |
Instruction *MovedExt = cast(ExtInst); |
| 5980 |
Value *ExtOperand = MovedExt->getOperand(0); |
5980 |
Value *ExtOperand = MovedExt->getOperand(0); |
| 5981 |
// If we have reached to a load, we need this extra profitability check |
5981 |
// If we have reached to a load, we need this extra profitability check |
| 5982 |
// as it could potentially be merged into an ext(load). |
5982 |
// as it could potentially be merged into an ext(load). |
| 5983 |
if (isa(ExtOperand) && |
5983 |
if (isa(ExtOperand) && |
| 5984 |
!(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || |
5984 |
!(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || |
| 5985 |
(ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI)))) |
5985 |
(ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI)))) |
| 5986 |
continue; |
5986 |
continue; |
| 5987 |
|
5987 |
|
| 5988 |
ProfitablyMovedExts.push_back(MovedExt); |
5988 |
ProfitablyMovedExts.push_back(MovedExt); |
| 5989 |
NewPromoted = true; |
5989 |
NewPromoted = true; |
| 5990 |
} |
5990 |
} |
| 5991 |
|
5991 |
|
| 5992 |
// If none of speculative promotions for NewExts is profitable, rollback |
5992 |
// If none of speculative promotions for NewExts is profitable, rollback |
| 5993 |
// and save the current extension (I) as the last profitable extension. |
5993 |
// and save the current extension (I) as the last profitable extension. |
| 5994 |
if (!NewPromoted) { |
5994 |
if (!NewPromoted) { |
| 5995 |
TPT.rollback(LastKnownGood); |
5995 |
TPT.rollback(LastKnownGood); |
| 5996 |
ProfitablyMovedExts.push_back(I); |
5996 |
ProfitablyMovedExts.push_back(I); |
| 5997 |
continue; |
5997 |
continue; |
| 5998 |
} |
5998 |
} |
| 5999 |
// The promotion is profitable. |
5999 |
// The promotion is profitable. |
| 6000 |
Promoted = true; |
6000 |
Promoted = true; |
| 6001 |
} |
6001 |
} |
| 6002 |
return Promoted; |
6002 |
return Promoted; |
| 6003 |
} |
6003 |
} |
| 6004 |
|
6004 |
|
| 6005 |
/// Merging redundant sexts when one is dominating the other. |
6005 |
/// Merging redundant sexts when one is dominating the other. |
| 6006 |
bool CodeGenPrepare::mergeSExts(Function &F) { |
6006 |
bool CodeGenPrepare::mergeSExts(Function &F) { |
| 6007 |
bool Changed = false; |
6007 |
bool Changed = false; |
| 6008 |
for (auto &Entry : ValToSExtendedUses) { |
6008 |
for (auto &Entry : ValToSExtendedUses) { |
| 6009 |
SExts &Insts = Entry.second; |
6009 |
SExts &Insts = Entry.second; |
| 6010 |
SExts CurPts; |
6010 |
SExts CurPts; |
| 6011 |
for (Instruction *Inst : Insts) { |
6011 |
for (Instruction *Inst : Insts) { |
| 6012 |
if (RemovedInsts.count(Inst) || !isa(Inst) || |
6012 |
if (RemovedInsts.count(Inst) || !isa(Inst) || |
| 6013 |
Inst->getOperand(0) != Entry.first) |
6013 |
Inst->getOperand(0) != Entry.first) |
| 6014 |
continue; |
6014 |
continue; |
| 6015 |
bool inserted = false; |
6015 |
bool inserted = false; |
| 6016 |
for (auto &Pt : CurPts) { |
6016 |
for (auto &Pt : CurPts) { |
| 6017 |
if (getDT(F).dominates(Inst, Pt)) { |
6017 |
if (getDT(F).dominates(Inst, Pt)) { |
| 6018 |
replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc); |
6018 |
replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc); |
| 6019 |
RemovedInsts.insert(Pt); |
6019 |
RemovedInsts.insert(Pt); |
| 6020 |
Pt->removeFromParent(); |
6020 |
Pt->removeFromParent(); |
| 6021 |
Pt = Inst; |
6021 |
Pt = Inst; |
| 6022 |
inserted = true; |
6022 |
inserted = true; |
| 6023 |
Changed = true; |
6023 |
Changed = true; |
| 6024 |
break; |
6024 |
break; |
| 6025 |
} |
6025 |
} |
| 6026 |
if (!getDT(F).dominates(Pt, Inst)) |
6026 |
if (!getDT(F).dominates(Pt, Inst)) |
| 6027 |
// Give up if we need to merge in a common dominator as the |
6027 |
// Give up if we need to merge in a common dominator as the |
| 6028 |
// experiments show it is not profitable. |
6028 |
// experiments show it is not profitable. |
| 6029 |
continue; |
6029 |
continue; |
| 6030 |
replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc); |
6030 |
replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc); |
| 6031 |
RemovedInsts.insert(Inst); |
6031 |
RemovedInsts.insert(Inst); |
| 6032 |
Inst->removeFromParent(); |
6032 |
Inst->removeFromParent(); |
| 6033 |
inserted = true; |
6033 |
inserted = true; |
| 6034 |
Changed = true; |
6034 |
Changed = true; |
| 6035 |
break; |
6035 |
break; |
| 6036 |
} |
6036 |
} |
| 6037 |
if (!inserted) |
6037 |
if (!inserted) |
| 6038 |
CurPts.push_back(Inst); |
6038 |
CurPts.push_back(Inst); |
| 6039 |
} |
6039 |
} |
| 6040 |
} |
6040 |
} |
| 6041 |
return Changed; |
6041 |
return Changed; |
| 6042 |
} |
6042 |
} |
| 6043 |
|
6043 |
|
| 6044 |
// Splitting large data structures so that the GEPs accessing them can have |
6044 |
// Splitting large data structures so that the GEPs accessing them can have |
| 6045 |
// smaller offsets so that they can be sunk to the same blocks as their users. |
6045 |
// smaller offsets so that they can be sunk to the same blocks as their users. |
| 6046 |
// For example, a large struct starting from %base is split into two parts |
6046 |
// For example, a large struct starting from %base is split into two parts |
| 6047 |
// where the second part starts from %new_base. |
6047 |
// where the second part starts from %new_base. |
| 6048 |
// |
6048 |
// |
| 6049 |
// Before: |
6049 |
// Before: |
| 6050 |
// BB0: |
6050 |
// BB0: |
| 6051 |
// %base = |
6051 |
// %base = |
| 6052 |
// |
6052 |
// |
| 6053 |
// BB1: |
6053 |
// BB1: |
| 6054 |
// %gep0 = gep %base, off0 |
6054 |
// %gep0 = gep %base, off0 |
| 6055 |
// %gep1 = gep %base, off1 |
6055 |
// %gep1 = gep %base, off1 |
| 6056 |
// %gep2 = gep %base, off2 |
6056 |
// %gep2 = gep %base, off2 |
| 6057 |
// |
6057 |
// |
| 6058 |
// BB2: |
6058 |
// BB2: |
| 6059 |
// %load1 = load %gep0 |
6059 |
// %load1 = load %gep0 |
| 6060 |
// %load2 = load %gep1 |
6060 |
// %load2 = load %gep1 |
| 6061 |
// %load3 = load %gep2 |
6061 |
// %load3 = load %gep2 |
| 6062 |
// |
6062 |
// |
| 6063 |
// After: |
6063 |
// After: |
| 6064 |
// BB0: |
6064 |
// BB0: |
| 6065 |
// %base = |
6065 |
// %base = |
| 6066 |
// %new_base = gep %base, off0 |
6066 |
// %new_base = gep %base, off0 |
| 6067 |
// |
6067 |
// |
| 6068 |
// BB1: |
6068 |
// BB1: |
| 6069 |
// %new_gep0 = %new_base |
6069 |
// %new_gep0 = %new_base |
| 6070 |
// %new_gep1 = gep %new_base, off1 - off0 |
6070 |
// %new_gep1 = gep %new_base, off1 - off0 |
| 6071 |
// %new_gep2 = gep %new_base, off2 - off0 |
6071 |
// %new_gep2 = gep %new_base, off2 - off0 |
| 6072 |
// |
6072 |
// |
| 6073 |
// BB2: |
6073 |
// BB2: |
| 6074 |
// %load1 = load i32, i32* %new_gep0 |
6074 |
// %load1 = load i32, i32* %new_gep0 |
| 6075 |
// %load2 = load i32, i32* %new_gep1 |
6075 |
// %load2 = load i32, i32* %new_gep1 |
| 6076 |
// %load3 = load i32, i32* %new_gep2 |
6076 |
// %load3 = load i32, i32* %new_gep2 |
| 6077 |
// |
6077 |
// |
| 6078 |
// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because |
6078 |
// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because |
| 6079 |
// their offsets are smaller enough to fit into the addressing mode. |
6079 |
// their offsets are smaller enough to fit into the addressing mode. |
| 6080 |
bool CodeGenPrepare::splitLargeGEPOffsets() { |
6080 |
bool CodeGenPrepare::splitLargeGEPOffsets() { |
| 6081 |
bool Changed = false; |
6081 |
bool Changed = false; |
| 6082 |
for (auto &Entry : LargeOffsetGEPMap) { |
6082 |
for (auto &Entry : LargeOffsetGEPMap) { |
| 6083 |
Value *OldBase = Entry.first; |
6083 |
Value *OldBase = Entry.first; |
| 6084 |
SmallVectorImpl, int64_t>> |
6084 |
SmallVectorImpl, int64_t>> |
| 6085 |
&LargeOffsetGEPs = Entry.second; |
6085 |
&LargeOffsetGEPs = Entry.second; |
| 6086 |
auto compareGEPOffset = |
6086 |
auto compareGEPOffset = |
| 6087 |
[&](const std::pair &LHS, |
6087 |
[&](const std::pair &LHS, |
| 6088 |
const std::pair &RHS) { |
6088 |
const std::pair &RHS) { |
| 6089 |
if (LHS.first == RHS.first) |
6089 |
if (LHS.first == RHS.first) |
| 6090 |
return false; |
6090 |
return false; |
| 6091 |
if (LHS.second != RHS.second) |
6091 |
if (LHS.second != RHS.second) |
| 6092 |
return LHS.second < RHS.second; |
6092 |
return LHS.second < RHS.second; |
| 6093 |
return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; |
6093 |
return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; |
| 6094 |
}; |
6094 |
}; |
| 6095 |
// Sorting all the GEPs of the same data structures based on the offsets. |
6095 |
// Sorting all the GEPs of the same data structures based on the offsets. |
| 6096 |
llvm::sort(LargeOffsetGEPs, compareGEPOffset); |
6096 |
llvm::sort(LargeOffsetGEPs, compareGEPOffset); |
| 6097 |
LargeOffsetGEPs.erase( |
6097 |
LargeOffsetGEPs.erase( |
| 6098 |
std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), |
6098 |
std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), |
| 6099 |
LargeOffsetGEPs.end()); |
6099 |
LargeOffsetGEPs.end()); |
| 6100 |
// Skip if all the GEPs have the same offsets. |
6100 |
// Skip if all the GEPs have the same offsets. |
| 6101 |
if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) |
6101 |
if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) |
| 6102 |
continue; |
6102 |
continue; |
| 6103 |
GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; |
6103 |
GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; |
| 6104 |
int64_t BaseOffset = LargeOffsetGEPs.begin()->second; |
6104 |
int64_t BaseOffset = LargeOffsetGEPs.begin()->second; |
| 6105 |
Value *NewBaseGEP = nullptr; |
6105 |
Value *NewBaseGEP = nullptr; |
| 6106 |
|
6106 |
|
| 6107 |
auto *LargeOffsetGEP = LargeOffsetGEPs.begin(); |
6107 |
auto *LargeOffsetGEP = LargeOffsetGEPs.begin(); |
| 6108 |
while (LargeOffsetGEP != LargeOffsetGEPs.end()) { |
6108 |
while (LargeOffsetGEP != LargeOffsetGEPs.end()) { |
| 6109 |
GetElementPtrInst *GEP = LargeOffsetGEP->first; |
6109 |
GetElementPtrInst *GEP = LargeOffsetGEP->first; |
| 6110 |
int64_t Offset = LargeOffsetGEP->second; |
6110 |
int64_t Offset = LargeOffsetGEP->second; |
| 6111 |
if (Offset != BaseOffset) { |
6111 |
if (Offset != BaseOffset) { |
| 6112 |
TargetLowering::AddrMode AddrMode; |
6112 |
TargetLowering::AddrMode AddrMode; |
| 6113 |
AddrMode.HasBaseReg = true; |
6113 |
AddrMode.HasBaseReg = true; |
| 6114 |
AddrMode.BaseOffs = Offset - BaseOffset; |
6114 |
AddrMode.BaseOffs = Offset - BaseOffset; |
| 6115 |
// The result type of the GEP might not be the type of the memory |
6115 |
// The result type of the GEP might not be the type of the memory |
| 6116 |
// access. |
6116 |
// access. |
| 6117 |
if (!TLI->isLegalAddressingMode(*DL, AddrMode, |
6117 |
if (!TLI->isLegalAddressingMode(*DL, AddrMode, |
| 6118 |
GEP->getResultElementType(), |
6118 |
GEP->getResultElementType(), |
| 6119 |
GEP->getAddressSpace())) { |
6119 |
GEP->getAddressSpace())) { |
| 6120 |
// We need to create a new base if the offset to the current base is |
6120 |
// We need to create a new base if the offset to the current base is |
| 6121 |
// too large to fit into the addressing mode. So, a very large struct |
6121 |
// too large to fit into the addressing mode. So, a very large struct |
| 6122 |
// may be split into several parts. |
6122 |
// may be split into several parts. |
| 6123 |
BaseGEP = GEP; |
6123 |
BaseGEP = GEP; |
| 6124 |
BaseOffset = Offset; |
6124 |
BaseOffset = Offset; |
| 6125 |
NewBaseGEP = nullptr; |
6125 |
NewBaseGEP = nullptr; |
| 6126 |
} |
6126 |
} |
| 6127 |
} |
6127 |
} |
| 6128 |
|
6128 |
|
| 6129 |
// Generate a new GEP to replace the current one. |
6129 |
// Generate a new GEP to replace the current one. |
| 6130 |
LLVMContext &Ctx = GEP->getContext(); |
6130 |
LLVMContext &Ctx = GEP->getContext(); |
| 6131 |
Type *PtrIdxTy = DL->getIndexType(GEP->getType()); |
6131 |
Type *PtrIdxTy = DL->getIndexType(GEP->getType()); |
| 6132 |
Type *I8PtrTy = |
6132 |
Type *I8PtrTy = |
| 6133 |
Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); |
6133 |
Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); |
| 6134 |
Type *I8Ty = Type::getInt8Ty(Ctx); |
6134 |
Type *I8Ty = Type::getInt8Ty(Ctx); |
| 6135 |
|
6135 |
|
| 6136 |
if (!NewBaseGEP) { |
6136 |
if (!NewBaseGEP) { |
| 6137 |
// Create a new base if we don't have one yet. Find the insertion |
6137 |
// Create a new base if we don't have one yet. Find the insertion |
| 6138 |
// pointer for the new base first. |
6138 |
// pointer for the new base first. |
| 6139 |
BasicBlock::iterator NewBaseInsertPt; |
6139 |
BasicBlock::iterator NewBaseInsertPt; |
| 6140 |
BasicBlock *NewBaseInsertBB; |
6140 |
BasicBlock *NewBaseInsertBB; |
| 6141 |
if (auto *BaseI = dyn_cast(OldBase)) { |
6141 |
if (auto *BaseI = dyn_cast(OldBase)) { |
| 6142 |
// If the base of the struct is an instruction, the new base will be |
6142 |
// If the base of the struct is an instruction, the new base will be |
| 6143 |
// inserted close to it. |
6143 |
// inserted close to it. |
| 6144 |
NewBaseInsertBB = BaseI->getParent(); |
6144 |
NewBaseInsertBB = BaseI->getParent(); |
| 6145 |
if (isa(BaseI)) |
6145 |
if (isa(BaseI)) |
| 6146 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
6146 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
| 6147 |
else if (InvokeInst *Invoke = dyn_cast(BaseI)) { |
6147 |
else if (InvokeInst *Invoke = dyn_cast(BaseI)) { |
| 6148 |
NewBaseInsertBB = |
6148 |
NewBaseInsertBB = |
| 6149 |
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI); |
6149 |
SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI); |
| 6150 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
6150 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
| 6151 |
} else |
6151 |
} else |
| 6152 |
NewBaseInsertPt = std::next(BaseI->getIterator()); |
6152 |
NewBaseInsertPt = std::next(BaseI->getIterator()); |
| 6153 |
} else { |
6153 |
} else { |
| 6154 |
// If the current base is an argument or global value, the new base |
6154 |
// If the current base is an argument or global value, the new base |
| 6155 |
// will be inserted to the entry block. |
6155 |
// will be inserted to the entry block. |
| 6156 |
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); |
6156 |
NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); |
| 6157 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
6157 |
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); |
| 6158 |
} |
6158 |
} |
| 6159 |
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); |
6159 |
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); |
| 6160 |
// Create a new base. |
6160 |
// Create a new base. |
| 6161 |
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset); |
6161 |
Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset); |
| 6162 |
NewBaseGEP = OldBase; |
6162 |
NewBaseGEP = OldBase; |
| 6163 |
if (NewBaseGEP->getType() != I8PtrTy) |
6163 |
if (NewBaseGEP->getType() != I8PtrTy) |
| 6164 |
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); |
6164 |
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); |
| 6165 |
NewBaseGEP = |
6165 |
NewBaseGEP = |
| 6166 |
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); |
6166 |
NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); |
| 6167 |
NewGEPBases.insert(NewBaseGEP); |
6167 |
NewGEPBases.insert(NewBaseGEP); |
| 6168 |
} |
6168 |
} |
| 6169 |
|
6169 |
|
| 6170 |
IRBuilder<> Builder(GEP); |
6170 |
IRBuilder<> Builder(GEP); |
| 6171 |
Value *NewGEP = NewBaseGEP; |
6171 |
Value *NewGEP = NewBaseGEP; |
| 6172 |
if (Offset == BaseOffset) { |
6172 |
if (Offset == BaseOffset) { |
| 6173 |
if (GEP->getType() != I8PtrTy) |
6173 |
if (GEP->getType() != I8PtrTy) |
| 6174 |
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); |
6174 |
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); |
| 6175 |
} else { |
6175 |
} else { |
| 6176 |
// Calculate the new offset for the new GEP. |
6176 |
// Calculate the new offset for the new GEP. |
| 6177 |
Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset); |
6177 |
Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset); |
| 6178 |
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); |
6178 |
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); |
| 6179 |
|
6179 |
|
| 6180 |
if (GEP->getType() != I8PtrTy) |
6180 |
if (GEP->getType() != I8PtrTy) |
| 6181 |
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); |
6181 |
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); |
| 6182 |
} |
6182 |
} |
| 6183 |
replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc); |
6183 |
replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc); |
| 6184 |
LargeOffsetGEPID.erase(GEP); |
6184 |
LargeOffsetGEPID.erase(GEP); |
| 6185 |
LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); |
6185 |
LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); |
| 6186 |
GEP->eraseFromParent(); |
6186 |
GEP->eraseFromParent(); |
| 6187 |
Changed = true; |
6187 |
Changed = true; |
| 6188 |
} |
6188 |
} |
| 6189 |
} |
6189 |
} |
| 6190 |
return Changed; |
6190 |
return Changed; |
| 6191 |
} |
6191 |
} |
| 6192 |
|
6192 |
|
| 6193 |
bool CodeGenPrepare::optimizePhiType( |
6193 |
bool CodeGenPrepare::optimizePhiType( |
| 6194 |
PHINode *I, SmallPtrSetImpl &Visited, |
6194 |
PHINode *I, SmallPtrSetImpl &Visited, |
| 6195 |
SmallPtrSetImpl &DeletedInstrs) { |
6195 |
SmallPtrSetImpl &DeletedInstrs) { |
| 6196 |
// We are looking for a collection on interconnected phi nodes that together |
6196 |
// We are looking for a collection on interconnected phi nodes that together |
| 6197 |
// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts |
6197 |
// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts |
| 6198 |
// are of the same type. Convert the whole set of nodes to the type of the |
6198 |
// are of the same type. Convert the whole set of nodes to the type of the |
| 6199 |
// bitcast. |
6199 |
// bitcast. |
| 6200 |
Type *PhiTy = I->getType(); |
6200 |
Type *PhiTy = I->getType(); |
| 6201 |
Type *ConvertTy = nullptr; |
6201 |
Type *ConvertTy = nullptr; |
| 6202 |
if (Visited.count(I) || |
6202 |
if (Visited.count(I) || |
| 6203 |
(!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy())) |
6203 |
(!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy())) |
| 6204 |
return false; |
6204 |
return false; |
| 6205 |
|
6205 |
|
| 6206 |
SmallVector Worklist; |
6206 |
SmallVector Worklist; |
| 6207 |
Worklist.push_back(cast(I)); |
6207 |
Worklist.push_back(cast(I)); |
| 6208 |
SmallPtrSet PhiNodes; |
6208 |
SmallPtrSet PhiNodes; |
| 6209 |
SmallPtrSet Constants; |
6209 |
SmallPtrSet Constants; |
| 6210 |
PhiNodes.insert(I); |
6210 |
PhiNodes.insert(I); |
| 6211 |
Visited.insert(I); |
6211 |
Visited.insert(I); |
| 6212 |
SmallPtrSet Defs; |
6212 |
SmallPtrSet Defs; |
| 6213 |
SmallPtrSet Uses; |
6213 |
SmallPtrSet Uses; |
| 6214 |
// This works by adding extra bitcasts between load/stores and removing |
6214 |
// This works by adding extra bitcasts between load/stores and removing |
| 6215 |
// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) |
6215 |
// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi)) |
| 6216 |
// we can get in the situation where we remove a bitcast in one iteration |
6216 |
// we can get in the situation where we remove a bitcast in one iteration |
| 6217 |
// just to add it again in the next. We need to ensure that at least one |
6217 |
// just to add it again in the next. We need to ensure that at least one |
| 6218 |
// bitcast we remove are anchored to something that will not change back. |
6218 |
// bitcast we remove are anchored to something that will not change back. |
| 6219 |
bool AnyAnchored = false; |
6219 |
bool AnyAnchored = false; |
| 6220 |
|
6220 |
|
| 6221 |
while (!Worklist.empty()) { |
6221 |
while (!Worklist.empty()) { |
| 6222 |
Instruction *II = Worklist.pop_back_val(); |
6222 |
Instruction *II = Worklist.pop_back_val(); |
| 6223 |
|
6223 |
|
| 6224 |
if (auto *Phi = dyn_cast(II)) { |
6224 |
if (auto *Phi = dyn_cast(II)) { |
| 6225 |
// Handle Defs, which might also be PHI's |
6225 |
// Handle Defs, which might also be PHI's |
| 6226 |
for (Value *V : Phi->incoming_values()) { |
6226 |
for (Value *V : Phi->incoming_values()) { |
| 6227 |
if (auto *OpPhi = dyn_cast(V)) { |
6227 |
if (auto *OpPhi = dyn_cast(V)) { |
| 6228 |
if (!PhiNodes.count(OpPhi)) { |
6228 |
if (!PhiNodes.count(OpPhi)) { |
| 6229 |
if (!Visited.insert(OpPhi).second) |
6229 |
if (!Visited.insert(OpPhi).second) |
| 6230 |
return false; |
6230 |
return false; |
| 6231 |
PhiNodes.insert(OpPhi); |
6231 |
PhiNodes.insert(OpPhi); |
| 6232 |
Worklist.push_back(OpPhi); |
6232 |
Worklist.push_back(OpPhi); |
| 6233 |
} |
6233 |
} |
| 6234 |
} else if (auto *OpLoad = dyn_cast(V)) { |
6234 |
} else if (auto *OpLoad = dyn_cast(V)) { |
| 6235 |
if (!OpLoad->isSimple()) |
6235 |
if (!OpLoad->isSimple()) |
| 6236 |
return false; |
6236 |
return false; |
| 6237 |
if (Defs.insert(OpLoad).second) |
6237 |
if (Defs.insert(OpLoad).second) |
| 6238 |
Worklist.push_back(OpLoad); |
6238 |
Worklist.push_back(OpLoad); |
| 6239 |
} else if (auto *OpEx = dyn_cast(V)) { |
6239 |
} else if (auto *OpEx = dyn_cast(V)) { |
| 6240 |
if (Defs.insert(OpEx).second) |
6240 |
if (Defs.insert(OpEx).second) |
| 6241 |
Worklist.push_back(OpEx); |
6241 |
Worklist.push_back(OpEx); |
| 6242 |
} else if (auto *OpBC = dyn_cast(V)) { |
6242 |
} else if (auto *OpBC = dyn_cast(V)) { |
| 6243 |
if (!ConvertTy) |
6243 |
if (!ConvertTy) |
| 6244 |
ConvertTy = OpBC->getOperand(0)->getType(); |
6244 |
ConvertTy = OpBC->getOperand(0)->getType(); |
| 6245 |
if (OpBC->getOperand(0)->getType() != ConvertTy) |
6245 |
if (OpBC->getOperand(0)->getType() != ConvertTy) |
| 6246 |
return false; |
6246 |
return false; |
| 6247 |
if (Defs.insert(OpBC).second) { |
6247 |
if (Defs.insert(OpBC).second) { |
| 6248 |
Worklist.push_back(OpBC); |
6248 |
Worklist.push_back(OpBC); |
| 6249 |
AnyAnchored |= !isa(OpBC->getOperand(0)) && |
6249 |
AnyAnchored |= !isa(OpBC->getOperand(0)) && |
| 6250 |
!isa(OpBC->getOperand(0)); |
6250 |
!isa(OpBC->getOperand(0)); |
| 6251 |
} |
6251 |
} |
| 6252 |
} else if (auto *OpC = dyn_cast(V)) |
6252 |
} else if (auto *OpC = dyn_cast(V)) |
| 6253 |
Constants.insert(OpC); |
6253 |
Constants.insert(OpC); |
| 6254 |
else |
6254 |
else |
| 6255 |
return false; |
6255 |
return false; |
| 6256 |
} |
6256 |
} |
| 6257 |
} |
6257 |
} |
| 6258 |
|
6258 |
|
| 6259 |
// Handle uses which might also be phi's |
6259 |
// Handle uses which might also be phi's |
| 6260 |
for (User *V : II->users()) { |
6260 |
for (User *V : II->users()) { |
| 6261 |
if (auto *OpPhi = dyn_cast(V)) { |
6261 |
if (auto *OpPhi = dyn_cast(V)) { |
| 6262 |
if (!PhiNodes.count(OpPhi)) { |
6262 |
if (!PhiNodes.count(OpPhi)) { |
| 6263 |
if (Visited.count(OpPhi)) |
6263 |
if (Visited.count(OpPhi)) |
| 6264 |
return false; |
6264 |
return false; |
| 6265 |
PhiNodes.insert(OpPhi); |
6265 |
PhiNodes.insert(OpPhi); |
| 6266 |
Visited.insert(OpPhi); |
6266 |
Visited.insert(OpPhi); |
| 6267 |
Worklist.push_back(OpPhi); |
6267 |
Worklist.push_back(OpPhi); |
| 6268 |
} |
6268 |
} |
| 6269 |
} else if (auto *OpStore = dyn_cast(V)) { |
6269 |
} else if (auto *OpStore = dyn_cast(V)) { |
| 6270 |
if (!OpStore->isSimple() || OpStore->getOperand(0) != II) |
6270 |
if (!OpStore->isSimple() || OpStore->getOperand(0) != II) |
| 6271 |
return false; |
6271 |
return false; |
| 6272 |
Uses.insert(OpStore); |
6272 |
Uses.insert(OpStore); |
| 6273 |
} else if (auto *OpBC = dyn_cast(V)) { |
6273 |
} else if (auto *OpBC = dyn_cast(V)) { |
| 6274 |
if (!ConvertTy) |
6274 |
if (!ConvertTy) |
| 6275 |
ConvertTy = OpBC->getType(); |
6275 |
ConvertTy = OpBC->getType(); |
| 6276 |
if (OpBC->getType() != ConvertTy) |
6276 |
if (OpBC->getType() != ConvertTy) |
| 6277 |
return false; |
6277 |
return false; |
| 6278 |
Uses.insert(OpBC); |
6278 |
Uses.insert(OpBC); |
| 6279 |
AnyAnchored |= |
6279 |
AnyAnchored |= |
| 6280 |
any_of(OpBC->users(), [](User *U) { return !isa(U); }); |
6280 |
any_of(OpBC->users(), [](User *U) { return !isa(U); }); |
| 6281 |
} else { |
6281 |
} else { |
| 6282 |
return false; |
6282 |
return false; |
| 6283 |
} |
6283 |
} |
| 6284 |
} |
6284 |
} |
| 6285 |
} |
6285 |
} |
| 6286 |
|
6286 |
|
| 6287 |
if (!ConvertTy || !AnyAnchored || |
6287 |
if (!ConvertTy || !AnyAnchored || |
| 6288 |
!TLI->shouldConvertPhiType(PhiTy, ConvertTy)) |
6288 |
!TLI->shouldConvertPhiType(PhiTy, ConvertTy)) |
| 6289 |
return false; |
6289 |
return false; |
| 6290 |
|
6290 |
|
| 6291 |
LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " |
6291 |
LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to " |
| 6292 |
<< *ConvertTy << "\n"); |
6292 |
<< *ConvertTy << "\n"); |
| 6293 |
|
6293 |
|
| 6294 |
// Create all the new phi nodes of the new type, and bitcast any loads to the |
6294 |
// Create all the new phi nodes of the new type, and bitcast any loads to the |
| 6295 |
// correct type. |
6295 |
// correct type. |
| 6296 |
ValueToValueMap ValMap; |
6296 |
ValueToValueMap ValMap; |
| 6297 |
for (ConstantData *C : Constants) |
6297 |
for (ConstantData *C : Constants) |
| 6298 |
ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy); |
6298 |
ValMap[C] = ConstantExpr::getCast(Instruction::BitCast, C, ConvertTy); |
| 6299 |
for (Instruction *D : Defs) { |
6299 |
for (Instruction *D : Defs) { |
| 6300 |
if (isa(D)) { |
6300 |
if (isa(D)) { |
| 6301 |
ValMap[D] = D->getOperand(0); |
6301 |
ValMap[D] = D->getOperand(0); |
| 6302 |
DeletedInstrs.insert(D); |
6302 |
DeletedInstrs.insert(D); |
| 6303 |
} else { |
6303 |
} else { |
| 6304 |
ValMap[D] = |
6304 |
ValMap[D] = |
| 6305 |
new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); |
6305 |
new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode()); |
| 6306 |
} |
6306 |
} |
| 6307 |
} |
6307 |
} |
| 6308 |
for (PHINode *Phi : PhiNodes) |
6308 |
for (PHINode *Phi : PhiNodes) |
| 6309 |
ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), |
6309 |
ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(), |
| 6310 |
Phi->getName() + ".tc", Phi); |
6310 |
Phi->getName() + ".tc", Phi); |
| 6311 |
// Pipe together all the PhiNodes. |
6311 |
// Pipe together all the PhiNodes. |
| 6312 |
for (PHINode *Phi : PhiNodes) { |
6312 |
for (PHINode *Phi : PhiNodes) { |
| 6313 |
PHINode *NewPhi = cast(ValMap[Phi]); |
6313 |
PHINode *NewPhi = cast(ValMap[Phi]); |
| 6314 |
for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) |
6314 |
for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++) |
| 6315 |
NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], |
6315 |
NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)], |
| 6316 |
Phi->getIncomingBlock(i)); |
6316 |
Phi->getIncomingBlock(i)); |
| 6317 |
Visited.insert(NewPhi); |
6317 |
Visited.insert(NewPhi); |
| 6318 |
} |
6318 |
} |
| 6319 |
// And finally pipe up the stores and bitcasts |
6319 |
// And finally pipe up the stores and bitcasts |
| 6320 |
for (Instruction *U : Uses) { |
6320 |
for (Instruction *U : Uses) { |
| 6321 |
if (isa(U)) { |
6321 |
if (isa(U)) { |
| 6322 |
DeletedInstrs.insert(U); |
6322 |
DeletedInstrs.insert(U); |
| 6323 |
replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc); |
6323 |
replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc); |
| 6324 |
} else { |
6324 |
} else { |
| 6325 |
U->setOperand(0, |
6325 |
U->setOperand(0, |
| 6326 |
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); |
6326 |
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U)); |
| 6327 |
} |
6327 |
} |
| 6328 |
} |
6328 |
} |
| 6329 |
|
6329 |
|
| 6330 |
// Save the removed phis to be deleted later. |
6330 |
// Save the removed phis to be deleted later. |
| 6331 |
for (PHINode *Phi : PhiNodes) |
6331 |
for (PHINode *Phi : PhiNodes) |
| 6332 |
DeletedInstrs.insert(Phi); |
6332 |
DeletedInstrs.insert(Phi); |
| 6333 |
return true; |
6333 |
return true; |
| 6334 |
} |
6334 |
} |
| 6335 |
|
6335 |
|
| 6336 |
bool CodeGenPrepare::optimizePhiTypes(Function &F) { |
6336 |
bool CodeGenPrepare::optimizePhiTypes(Function &F) { |
| 6337 |
if (!OptimizePhiTypes) |
6337 |
if (!OptimizePhiTypes) |
| 6338 |
return false; |
6338 |
return false; |
| 6339 |
|
6339 |
|
| 6340 |
bool Changed = false; |
6340 |
bool Changed = false; |
| 6341 |
SmallPtrSet Visited; |
6341 |
SmallPtrSet Visited; |
| 6342 |
SmallPtrSet DeletedInstrs; |
6342 |
SmallPtrSet DeletedInstrs; |
| 6343 |
|
6343 |
|
| 6344 |
// Attempt to optimize all the phis in the functions to the correct type. |
6344 |
// Attempt to optimize all the phis in the functions to the correct type. |
| 6345 |
for (auto &BB : F) |
6345 |
for (auto &BB : F) |
| 6346 |
for (auto &Phi : BB.phis()) |
6346 |
for (auto &Phi : BB.phis()) |
| 6347 |
Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs); |
6347 |
Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs); |
| 6348 |
|
6348 |
|
| 6349 |
// Remove any old phi's that have been converted. |
6349 |
// Remove any old phi's that have been converted. |
| 6350 |
for (auto *I : DeletedInstrs) { |
6350 |
for (auto *I : DeletedInstrs) { |
| 6351 |
replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc); |
6351 |
replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc); |
| 6352 |
I->eraseFromParent(); |
6352 |
I->eraseFromParent(); |
| 6353 |
} |
6353 |
} |
| 6354 |
|
6354 |
|
| 6355 |
return Changed; |
6355 |
return Changed; |
| 6356 |
} |
6356 |
} |
| 6357 |
|
6357 |
|
| 6358 |
/// Return true, if an ext(load) can be formed from an extension in |
6358 |
/// Return true, if an ext(load) can be formed from an extension in |
| 6359 |
/// \p MovedExts. |
6359 |
/// \p MovedExts. |
| 6360 |
bool CodeGenPrepare::canFormExtLd( |
6360 |
bool CodeGenPrepare::canFormExtLd( |
| 6361 |
const SmallVectorImpl &MovedExts, LoadInst *&LI, |
6361 |
const SmallVectorImpl &MovedExts, LoadInst *&LI, |
| 6362 |
Instruction *&Inst, bool HasPromoted) { |
6362 |
Instruction *&Inst, bool HasPromoted) { |
| 6363 |
for (auto *MovedExtInst : MovedExts) { |
6363 |
for (auto *MovedExtInst : MovedExts) { |
| 6364 |
if (isa(MovedExtInst->getOperand(0))) { |
6364 |
if (isa(MovedExtInst->getOperand(0))) { |
| 6365 |
LI = cast(MovedExtInst->getOperand(0)); |
6365 |
LI = cast(MovedExtInst->getOperand(0)); |
| 6366 |
Inst = MovedExtInst; |
6366 |
Inst = MovedExtInst; |
| 6367 |
break; |
6367 |
break; |
| 6368 |
} |
6368 |
} |
| 6369 |
} |
6369 |
} |
| 6370 |
if (!LI) |
6370 |
if (!LI) |
| 6371 |
return false; |
6371 |
return false; |
| 6372 |
|
6372 |
|
| 6373 |
// If they're already in the same block, there's nothing to do. |
6373 |
// If they're already in the same block, there's nothing to do. |
| 6374 |
// Make the cheap checks first if we did not promote. |
6374 |
// Make the cheap checks first if we did not promote. |
| 6375 |
// If we promoted, we need to check if it is indeed profitable. |
6375 |
// If we promoted, we need to check if it is indeed profitable. |
| 6376 |
if (!HasPromoted && LI->getParent() == Inst->getParent()) |
6376 |
if (!HasPromoted && LI->getParent() == Inst->getParent()) |
| 6377 |
return false; |
6377 |
return false; |
| 6378 |
|
6378 |
|
| 6379 |
return TLI->isExtLoad(LI, Inst, *DL); |
6379 |
return TLI->isExtLoad(LI, Inst, *DL); |
| 6380 |
} |
6380 |
} |
| 6381 |
|
6381 |
|
| 6382 |
/// Move a zext or sext fed by a load into the same basic block as the load, |
6382 |
/// Move a zext or sext fed by a load into the same basic block as the load, |
| 6383 |
/// unless conditions are unfavorable. This allows SelectionDAG to fold the |
6383 |
/// unless conditions are unfavorable. This allows SelectionDAG to fold the |
| 6384 |
/// extend into the load. |
6384 |
/// extend into the load. |
| 6385 |
/// |
6385 |
/// |
| 6386 |
/// E.g., |
6386 |
/// E.g., |
| 6387 |
/// \code |
6387 |
/// \code |
| 6388 |
/// %ld = load i32* %addr |
6388 |
/// %ld = load i32* %addr |
| 6389 |
/// %add = add nuw i32 %ld, 4 |
6389 |
/// %add = add nuw i32 %ld, 4 |
| 6390 |
/// %zext = zext i32 %add to i64 |
6390 |
/// %zext = zext i32 %add to i64 |
| 6391 |
// \endcode |
6391 |
// \endcode |
| 6392 |
/// => |
6392 |
/// => |
| 6393 |
/// \code |
6393 |
/// \code |
| 6394 |
/// %ld = load i32* %addr |
6394 |
/// %ld = load i32* %addr |
| 6395 |
/// %zext = zext i32 %ld to i64 |
6395 |
/// %zext = zext i32 %ld to i64 |
| 6396 |
/// %add = add nuw i64 %zext, 4 |
6396 |
/// %add = add nuw i64 %zext, 4 |
| 6397 |
/// \encode |
6397 |
/// \encode |
| 6398 |
/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which |
6398 |
/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which |
| 6399 |
/// allow us to match zext(load i32*) to i64. |
6399 |
/// allow us to match zext(load i32*) to i64. |
| 6400 |
/// |
6400 |
/// |
| 6401 |
/// Also, try to promote the computations used to obtain a sign extended |
6401 |
/// Also, try to promote the computations used to obtain a sign extended |
| 6402 |
/// value used into memory accesses. |
6402 |
/// value used into memory accesses. |
| 6403 |
/// E.g., |
6403 |
/// E.g., |
| 6404 |
/// \code |
6404 |
/// \code |
| 6405 |
/// a = add nsw i32 b, 3 |
6405 |
/// a = add nsw i32 b, 3 |
| 6406 |
/// d = sext i32 a to i64 |
6406 |
/// d = sext i32 a to i64 |
| 6407 |
/// e = getelementptr ..., i64 d |
6407 |
/// e = getelementptr ..., i64 d |
| 6408 |
/// \endcode |
6408 |
/// \endcode |
| 6409 |
/// => |
6409 |
/// => |
| 6410 |
/// \code |
6410 |
/// \code |
| 6411 |
/// f = sext i32 b to i64 |
6411 |
/// f = sext i32 b to i64 |
| 6412 |
/// a = add nsw i64 f, 3 |
6412 |
/// a = add nsw i64 f, 3 |
| 6413 |
/// e = getelementptr ..., i64 a |
6413 |
/// e = getelementptr ..., i64 a |
| 6414 |
/// \endcode |
6414 |
/// \endcode |
| 6415 |
/// |
6415 |
/// |
| 6416 |
/// \p Inst[in/out] the extension may be modified during the process if some |
6416 |
/// \p Inst[in/out] the extension may be modified during the process if some |
| 6417 |
/// promotions apply. |
6417 |
/// promotions apply. |
| 6418 |
bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { |
6418 |
bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { |
| 6419 |
bool AllowPromotionWithoutCommonHeader = false; |
6419 |
bool AllowPromotionWithoutCommonHeader = false; |
| 6420 |
/// See if it is an interesting sext operations for the address type |
6420 |
/// See if it is an interesting sext operations for the address type |
| 6421 |
/// promotion before trying to promote it, e.g., the ones with the right |
6421 |
/// promotion before trying to promote it, e.g., the ones with the right |
| 6422 |
/// type and used in memory accesses. |
6422 |
/// type and used in memory accesses. |
| 6423 |
bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion( |
6423 |
bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion( |
| 6424 |
*Inst, AllowPromotionWithoutCommonHeader); |
6424 |
*Inst, AllowPromotionWithoutCommonHeader); |
| 6425 |
TypePromotionTransaction TPT(RemovedInsts); |
6425 |
TypePromotionTransaction TPT(RemovedInsts); |
| 6426 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
6426 |
TypePromotionTransaction::ConstRestorationPt LastKnownGood = |
| 6427 |
TPT.getRestorationPoint(); |
6427 |
TPT.getRestorationPoint(); |
| 6428 |
SmallVector Exts; |
6428 |
SmallVector Exts; |
| 6429 |
SmallVector SpeculativelyMovedExts; |
6429 |
SmallVector SpeculativelyMovedExts; |
| 6430 |
Exts.push_back(Inst); |
6430 |
Exts.push_back(Inst); |
| 6431 |
|
6431 |
|
| 6432 |
bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts); |
6432 |
bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts); |
| 6433 |
|
6433 |
|
| 6434 |
// Look for a load being extended. |
6434 |
// Look for a load being extended. |
| 6435 |
LoadInst *LI = nullptr; |
6435 |
LoadInst *LI = nullptr; |
| 6436 |
Instruction *ExtFedByLoad; |
6436 |
Instruction *ExtFedByLoad; |
| 6437 |
|
6437 |
|
| 6438 |
// Try to promote a chain of computation if it allows to form an extended |
6438 |
// Try to promote a chain of computation if it allows to form an extended |
| 6439 |
// load. |
6439 |
// load. |
| 6440 |
if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { |
6440 |
if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { |
| 6441 |
assert(LI && ExtFedByLoad && "Expect a valid load and extension"); |
6441 |
assert(LI && ExtFedByLoad && "Expect a valid load and extension"); |
| 6442 |
TPT.commit(); |
6442 |
TPT.commit(); |
| 6443 |
// Move the extend into the same block as the load. |
6443 |
// Move the extend into the same block as the load. |
| 6444 |
ExtFedByLoad->moveAfter(LI); |
6444 |
ExtFedByLoad->moveAfter(LI); |
| 6445 |
++NumExtsMoved; |
6445 |
++NumExtsMoved; |
| 6446 |
Inst = ExtFedByLoad; |
6446 |
Inst = ExtFedByLoad; |
| 6447 |
return true; |
6447 |
return true; |
| 6448 |
} |
6448 |
} |
| 6449 |
|
6449 |
|
| 6450 |
// Continue promoting SExts if known as considerable depending on targets. |
6450 |
// Continue promoting SExts if known as considerable depending on targets. |
| 6451 |
if (ATPConsiderable && |
6451 |
if (ATPConsiderable && |
| 6452 |
performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader, |
6452 |
performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader, |
| 6453 |
HasPromoted, TPT, SpeculativelyMovedExts)) |
6453 |
HasPromoted, TPT, SpeculativelyMovedExts)) |
| 6454 |
return true; |
6454 |
return true; |
| 6455 |
|
6455 |
|
| 6456 |
TPT.rollback(LastKnownGood); |
6456 |
TPT.rollback(LastKnownGood); |
| 6457 |
return false; |
6457 |
return false; |
| 6458 |
} |
6458 |
} |
| 6459 |
|
6459 |
|
| 6460 |
// Perform address type promotion if doing so is profitable. |
6460 |
// Perform address type promotion if doing so is profitable. |
| 6461 |
// If AllowPromotionWithoutCommonHeader == false, we should find other sext |
6461 |
// If AllowPromotionWithoutCommonHeader == false, we should find other sext |
| 6462 |
// instructions that sign extended the same initial value. However, if |
6462 |
// instructions that sign extended the same initial value. However, if |
| 6463 |
// AllowPromotionWithoutCommonHeader == true, we expect promoting the |
6463 |
// AllowPromotionWithoutCommonHeader == true, we expect promoting the |
| 6464 |
// extension is just profitable. |
6464 |
// extension is just profitable. |
| 6465 |
bool CodeGenPrepare::performAddressTypePromotion( |
6465 |
bool CodeGenPrepare::performAddressTypePromotion( |
| 6466 |
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, |
6466 |
Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, |
| 6467 |
bool HasPromoted, TypePromotionTransaction &TPT, |
6467 |
bool HasPromoted, TypePromotionTransaction &TPT, |
| 6468 |
SmallVectorImpl &SpeculativelyMovedExts) { |
6468 |
SmallVectorImpl &SpeculativelyMovedExts) { |
| 6469 |
bool Promoted = false; |
6469 |
bool Promoted = false; |
| 6470 |
SmallPtrSet UnhandledExts; |
6470 |
SmallPtrSet UnhandledExts; |
| 6471 |
bool AllSeenFirst = true; |
6471 |
bool AllSeenFirst = true; |
| 6472 |
for (auto *I : SpeculativelyMovedExts) { |
6472 |
for (auto *I : SpeculativelyMovedExts) { |
| 6473 |
Value *HeadOfChain = I->getOperand(0); |
6473 |
Value *HeadOfChain = I->getOperand(0); |
| 6474 |
DenseMap::iterator AlreadySeen = |
6474 |
DenseMap::iterator AlreadySeen = |
| 6475 |
SeenChainsForSExt.find(HeadOfChain); |
6475 |
SeenChainsForSExt.find(HeadOfChain); |
| 6476 |
// If there is an unhandled SExt which has the same header, try to promote |
6476 |
// If there is an unhandled SExt which has the same header, try to promote |
| 6477 |
// it as well. |
6477 |
// it as well. |
| 6478 |
if (AlreadySeen != SeenChainsForSExt.end()) { |
6478 |
if (AlreadySeen != SeenChainsForSExt.end()) { |
| 6479 |
if (AlreadySeen->second != nullptr) |
6479 |
if (AlreadySeen->second != nullptr) |
| 6480 |
UnhandledExts.insert(AlreadySeen->second); |
6480 |
UnhandledExts.insert(AlreadySeen->second); |
| 6481 |
AllSeenFirst = false; |
6481 |
AllSeenFirst = false; |
| 6482 |
} |
6482 |
} |
| 6483 |
} |
6483 |
} |
| 6484 |
|
6484 |
|
| 6485 |
if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader && |
6485 |
if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader && |
| 6486 |
SpeculativelyMovedExts.size() == 1)) { |
6486 |
SpeculativelyMovedExts.size() == 1)) { |
| 6487 |
TPT.commit(); |
6487 |
TPT.commit(); |
| 6488 |
if (HasPromoted) |
6488 |
if (HasPromoted) |
| 6489 |
Promoted = true; |
6489 |
Promoted = true; |
| 6490 |
for (auto *I : SpeculativelyMovedExts) { |
6490 |
for (auto *I : SpeculativelyMovedExts) { |
| 6491 |
Value *HeadOfChain = I->getOperand(0); |
6491 |
Value *HeadOfChain = I->getOperand(0); |
| 6492 |
SeenChainsForSExt[HeadOfChain] = nullptr; |
6492 |
SeenChainsForSExt[HeadOfChain] = nullptr; |
| 6493 |
ValToSExtendedUses[HeadOfChain].push_back(I); |
6493 |
ValToSExtendedUses[HeadOfChain].push_back(I); |
| 6494 |
} |
6494 |
} |
| 6495 |
// Update Inst as promotion happen. |
6495 |
// Update Inst as promotion happen. |
| 6496 |
Inst = SpeculativelyMovedExts.pop_back_val(); |
6496 |
Inst = SpeculativelyMovedExts.pop_back_val(); |
| 6497 |
} else { |
6497 |
} else { |
| 6498 |
// This is the first chain visited from the header, keep the current chain |
6498 |
// This is the first chain visited from the header, keep the current chain |
| 6499 |
// as unhandled. Defer to promote this until we encounter another SExt |
6499 |
// as unhandled. Defer to promote this until we encounter another SExt |
| 6500 |
// chain derived from the same header. |
6500 |
// chain derived from the same header. |
| 6501 |
for (auto *I : SpeculativelyMovedExts) { |
6501 |
for (auto *I : SpeculativelyMovedExts) { |
| 6502 |
Value *HeadOfChain = I->getOperand(0); |
6502 |
Value *HeadOfChain = I->getOperand(0); |
| 6503 |
SeenChainsForSExt[HeadOfChain] = Inst; |
6503 |
SeenChainsForSExt[HeadOfChain] = Inst; |
| 6504 |
} |
6504 |
} |
| 6505 |
return false; |
6505 |
return false; |
| 6506 |
} |
6506 |
} |
| 6507 |
|
6507 |
|
| 6508 |
if (!AllSeenFirst && !UnhandledExts.empty()) |
6508 |
if (!AllSeenFirst && !UnhandledExts.empty()) |
| 6509 |
for (auto *VisitedSExt : UnhandledExts) { |
6509 |
for (auto *VisitedSExt : UnhandledExts) { |
| 6510 |
if (RemovedInsts.count(VisitedSExt)) |
6510 |
if (RemovedInsts.count(VisitedSExt)) |
| 6511 |
continue; |
6511 |
continue; |
| 6512 |
TypePromotionTransaction TPT(RemovedInsts); |
6512 |
TypePromotionTransaction TPT(RemovedInsts); |
| 6513 |
SmallVector Exts; |
6513 |
SmallVector Exts; |
| 6514 |
SmallVector Chains; |
6514 |
SmallVector Chains; |
| 6515 |
Exts.push_back(VisitedSExt); |
6515 |
Exts.push_back(VisitedSExt); |
| 6516 |
bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); |
6516 |
bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); |
| 6517 |
TPT.commit(); |
6517 |
TPT.commit(); |
| 6518 |
if (HasPromoted) |
6518 |
if (HasPromoted) |
| 6519 |
Promoted = true; |
6519 |
Promoted = true; |
| 6520 |
for (auto *I : Chains) { |
6520 |
for (auto *I : Chains) { |
| 6521 |
Value *HeadOfChain = I->getOperand(0); |
6521 |
Value *HeadOfChain = I->getOperand(0); |
| 6522 |
// Mark this as handled. |
6522 |
// Mark this as handled. |
| 6523 |
SeenChainsForSExt[HeadOfChain] = nullptr; |
6523 |
SeenChainsForSExt[HeadOfChain] = nullptr; |
| 6524 |
ValToSExtendedUses[HeadOfChain].push_back(I); |
6524 |
ValToSExtendedUses[HeadOfChain].push_back(I); |
| 6525 |
} |
6525 |
} |
| 6526 |
} |
6526 |
} |
| 6527 |
return Promoted; |
6527 |
return Promoted; |
| 6528 |
} |
6528 |
} |
| 6529 |
|
6529 |
|
| 6530 |
bool CodeGenPrepare::optimizeExtUses(Instruction *I) { |
6530 |
bool CodeGenPrepare::optimizeExtUses(Instruction *I) { |
| 6531 |
BasicBlock *DefBB = I->getParent(); |
6531 |
BasicBlock *DefBB = I->getParent(); |
| 6532 |
|
6532 |
|
| 6533 |
// If the result of a {s|z}ext and its source are both live out, rewrite all |
6533 |
// If the result of a {s|z}ext and its source are both live out, rewrite all |
| 6534 |
// other uses of the source with result of extension. |
6534 |
// other uses of the source with result of extension. |
| 6535 |
Value *Src = I->getOperand(0); |
6535 |
Value *Src = I->getOperand(0); |
| 6536 |
if (Src->hasOneUse()) |
6536 |
if (Src->hasOneUse()) |
| 6537 |
return false; |
6537 |
return false; |
| 6538 |
|
6538 |
|
| 6539 |
// Only do this xform if truncating is free. |
6539 |
// Only do this xform if truncating is free. |
| 6540 |
if (!TLI->isTruncateFree(I->getType(), Src->getType())) |
6540 |
if (!TLI->isTruncateFree(I->getType(), Src->getType())) |
| 6541 |
return false; |
6541 |
return false; |
| 6542 |
|
6542 |
|
| 6543 |
// Only safe to perform the optimization if the source is also defined in |
6543 |
// Only safe to perform the optimization if the source is also defined in |
| 6544 |
// this block. |
6544 |
// this block. |
| 6545 |
if (!isa(Src) || DefBB != cast(Src)->getParent()) |
6545 |
if (!isa(Src) || DefBB != cast(Src)->getParent()) |
| 6546 |
return false; |
6546 |
return false; |
| 6547 |
|
6547 |
|
| 6548 |
bool DefIsLiveOut = false; |
6548 |
bool DefIsLiveOut = false; |
| 6549 |
for (User *U : I->users()) { |
6549 |
for (User *U : I->users()) { |
| 6550 |
Instruction *UI = cast(U); |
6550 |
Instruction *UI = cast(U); |
| 6551 |
|
6551 |
|
| 6552 |
// Figure out which BB this ext is used in. |
6552 |
// Figure out which BB this ext is used in. |
| 6553 |
BasicBlock *UserBB = UI->getParent(); |
6553 |
BasicBlock *UserBB = UI->getParent(); |
| 6554 |
if (UserBB == DefBB) |
6554 |
if (UserBB == DefBB) |
| 6555 |
continue; |
6555 |
continue; |
| 6556 |
DefIsLiveOut = true; |
6556 |
DefIsLiveOut = true; |
| 6557 |
break; |
6557 |
break; |
| 6558 |
} |
6558 |
} |
| 6559 |
if (!DefIsLiveOut) |
6559 |
if (!DefIsLiveOut) |
| 6560 |
return false; |
6560 |
return false; |
| 6561 |
|
6561 |
|
| 6562 |
// Make sure none of the uses are PHI nodes. |
6562 |
// Make sure none of the uses are PHI nodes. |
| 6563 |
for (User *U : Src->users()) { |
6563 |
for (User *U : Src->users()) { |
| 6564 |
Instruction *UI = cast(U); |
6564 |
Instruction *UI = cast(U); |
| 6565 |
BasicBlock *UserBB = UI->getParent(); |
6565 |
BasicBlock *UserBB = UI->getParent(); |
| 6566 |
if (UserBB == DefBB) |
6566 |
if (UserBB == DefBB) |
| 6567 |
continue; |
6567 |
continue; |
| 6568 |
// Be conservative. We don't want this xform to end up introducing |
6568 |
// Be conservative. We don't want this xform to end up introducing |
| 6569 |
// reloads just before load / store instructions. |
6569 |
// reloads just before load / store instructions. |
| 6570 |
if (isa(UI) || isa(UI) || isa(UI)) |
6570 |
if (isa(UI) || isa(UI) || isa(UI)) |
| 6571 |
return false; |
6571 |
return false; |
| 6572 |
} |
6572 |
} |
| 6573 |
|
6573 |
|
| 6574 |
// InsertedTruncs - Only insert one trunc in each block once. |
6574 |
// InsertedTruncs - Only insert one trunc in each block once. |
| 6575 |
DenseMap InsertedTruncs; |
6575 |
DenseMap InsertedTruncs; |
| 6576 |
|
6576 |
|
| 6577 |
bool MadeChange = false; |
6577 |
bool MadeChange = false; |
| 6578 |
for (Use &U : Src->uses()) { |
6578 |
for (Use &U : Src->uses()) { |
| 6579 |
Instruction *User = cast(U.getUser()); |
6579 |
Instruction *User = cast(U.getUser()); |
| 6580 |
|
6580 |
|
| 6581 |
// Figure out which BB this ext is used in. |
6581 |
// Figure out which BB this ext is used in. |
| 6582 |
BasicBlock *UserBB = User->getParent(); |
6582 |
BasicBlock *UserBB = User->getParent(); |
| 6583 |
if (UserBB == DefBB) |
6583 |
if (UserBB == DefBB) |
| 6584 |
continue; |
6584 |
continue; |
| 6585 |
|
6585 |
|
| 6586 |
// Both src and def are live in this block. Rewrite the use. |
6586 |
// Both src and def are live in this block. Rewrite the use. |
| 6587 |
Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; |
6587 |
Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; |
| 6588 |
|
6588 |
|
| 6589 |
if (!InsertedTrunc) { |
6589 |
if (!InsertedTrunc) { |
| 6590 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
6590 |
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); |
| 6591 |
assert(InsertPt != UserBB->end()); |
6591 |
assert(InsertPt != UserBB->end()); |
| 6592 |
InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); |
6592 |
InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); |
| 6593 |
InsertedInsts.insert(InsertedTrunc); |
6593 |
InsertedInsts.insert(InsertedTrunc); |
| 6594 |
} |
6594 |
} |
| 6595 |
|
6595 |
|
| 6596 |
// Replace a use of the {s|z}ext source with a use of the result. |
6596 |
// Replace a use of the {s|z}ext source with a use of the result. |
| 6597 |
U = InsertedTrunc; |
6597 |
U = InsertedTrunc; |
| 6598 |
++NumExtUses; |
6598 |
++NumExtUses; |
| 6599 |
MadeChange = true; |
6599 |
MadeChange = true; |
| 6600 |
} |
6600 |
} |
| 6601 |
|
6601 |
|
| 6602 |
return MadeChange; |
6602 |
return MadeChange; |
| 6603 |
} |
6603 |
} |
| 6604 |
|
6604 |
|
| 6605 |
// Find loads whose uses only use some of the loaded value's bits. Add an "and" |
6605 |
// Find loads whose uses only use some of the loaded value's bits. Add an "and" |
| 6606 |
// just after the load if the target can fold this into one extload instruction, |
6606 |
// just after the load if the target can fold this into one extload instruction, |
| 6607 |
// with the hope of eliminating some of the other later "and" instructions using |
6607 |
// with the hope of eliminating some of the other later "and" instructions using |
| 6608 |
// the loaded value. "and"s that are made trivially redundant by the insertion |
6608 |
// the loaded value. "and"s that are made trivially redundant by the insertion |
| 6609 |
// of the new "and" are removed by this function, while others (e.g. those whose |
6609 |
// of the new "and" are removed by this function, while others (e.g. those whose |
| 6610 |
// path from the load goes through a phi) are left for isel to potentially |
6610 |
// path from the load goes through a phi) are left for isel to potentially |
| 6611 |
// remove. |
6611 |
// remove. |
| 6612 |
// |
6612 |
// |
| 6613 |
// For example: |
6613 |
// For example: |
| 6614 |
// |
6614 |
// |
| 6615 |
// b0: |
6615 |
// b0: |
| 6616 |
// x = load i32 |
6616 |
// x = load i32 |
| 6617 |
// ... |
6617 |
// ... |
| 6618 |
// b1: |
6618 |
// b1: |
| 6619 |
// y = and x, 0xff |
6619 |
// y = and x, 0xff |
| 6620 |
// z = use y |
6620 |
// z = use y |
| 6621 |
// |
6621 |
// |
| 6622 |
// becomes: |
6622 |
// becomes: |
| 6623 |
// |
6623 |
// |
| 6624 |
// b0: |
6624 |
// b0: |
| 6625 |
// x = load i32 |
6625 |
// x = load i32 |
| 6626 |
// x' = and x, 0xff |
6626 |
// x' = and x, 0xff |
| 6627 |
// ... |
6627 |
// ... |
| 6628 |
// b1: |
6628 |
// b1: |
| 6629 |
// z = use x' |
6629 |
// z = use x' |
| 6630 |
// |
6630 |
// |
| 6631 |
// whereas: |
6631 |
// whereas: |
| 6632 |
// |
6632 |
// |
| 6633 |
// b0: |
6633 |
// b0: |
| 6634 |
// x1 = load i32 |
6634 |
// x1 = load i32 |
| 6635 |
// ... |
6635 |
// ... |
| 6636 |
// b1: |
6636 |
// b1: |
| 6637 |
// x2 = load i32 |
6637 |
// x2 = load i32 |
| 6638 |
// ... |
6638 |
// ... |
| 6639 |
// b2: |
6639 |
// b2: |
| 6640 |
// x = phi x1, x2 |
6640 |
// x = phi x1, x2 |
| 6641 |
// y = and x, 0xff |
6641 |
// y = and x, 0xff |
| 6642 |
// |
6642 |
// |
| 6643 |
// becomes (after a call to optimizeLoadExt for each load): |
6643 |
// becomes (after a call to optimizeLoadExt for each load): |
| 6644 |
// |
6644 |
// |
| 6645 |
// b0: |
6645 |
// b0: |
| 6646 |
// x1 = load i32 |
6646 |
// x1 = load i32 |
| 6647 |
// x1' = and x1, 0xff |
6647 |
// x1' = and x1, 0xff |
| 6648 |
// ... |
6648 |
// ... |
| 6649 |
// b1: |
6649 |
// b1: |
| 6650 |
// x2 = load i32 |
6650 |
// x2 = load i32 |
| 6651 |
// x2' = and x2, 0xff |
6651 |
// x2' = and x2, 0xff |
| 6652 |
// ... |
6652 |
// ... |
| 6653 |
// b2: |
6653 |
// b2: |
| 6654 |
// x = phi x1', x2' |
6654 |
// x = phi x1', x2' |
| 6655 |
// y = and x, 0xff |
6655 |
// y = and x, 0xff |
| 6656 |
bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { |
6656 |
bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { |
| 6657 |
if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy()) |
6657 |
if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy()) |
| 6658 |
return false; |
6658 |
return false; |
| 6659 |
|
6659 |
|
| 6660 |
// Skip loads we've already transformed. |
6660 |
// Skip loads we've already transformed. |
| 6661 |
if (Load->hasOneUse() && |
6661 |
if (Load->hasOneUse() && |
| 6662 |
InsertedInsts.count(cast(*Load->user_begin()))) |
6662 |
InsertedInsts.count(cast(*Load->user_begin()))) |
| 6663 |
return false; |
6663 |
return false; |
| 6664 |
|
6664 |
|
| 6665 |
// Look at all uses of Load, looking through phis, to determine how many bits |
6665 |
// Look at all uses of Load, looking through phis, to determine how many bits |
| 6666 |
// of the loaded value are needed. |
6666 |
// of the loaded value are needed. |
| 6667 |
SmallVector WorkList; |
6667 |
SmallVector WorkList; |
| 6668 |
SmallPtrSet Visited; |
6668 |
SmallPtrSet Visited; |
| 6669 |
SmallVector AndsToMaybeRemove; |
6669 |
SmallVector AndsToMaybeRemove; |
| 6670 |
for (auto *U : Load->users()) |
6670 |
for (auto *U : Load->users()) |
| 6671 |
WorkList.push_back(cast(U)); |
6671 |
WorkList.push_back(cast(U)); |
| 6672 |
|
6672 |
|
| 6673 |
EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); |
6673 |
EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); |
| 6674 |
unsigned BitWidth = LoadResultVT.getSizeInBits(); |
6674 |
unsigned BitWidth = LoadResultVT.getSizeInBits(); |
| 6675 |
// If the BitWidth is 0, do not try to optimize the type |
6675 |
// If the BitWidth is 0, do not try to optimize the type |
| 6676 |
if (BitWidth == 0) |
6676 |
if (BitWidth == 0) |
| 6677 |
return false; |
6677 |
return false; |
| 6678 |
|
6678 |
|
| 6679 |
APInt DemandBits(BitWidth, 0); |
6679 |
APInt DemandBits(BitWidth, 0); |
| 6680 |
APInt WidestAndBits(BitWidth, 0); |
6680 |
APInt WidestAndBits(BitWidth, 0); |
| 6681 |
|
6681 |
|
| 6682 |
while (!WorkList.empty()) { |
6682 |
while (!WorkList.empty()) { |
| 6683 |
Instruction *I = WorkList.pop_back_val(); |
6683 |
Instruction *I = WorkList.pop_back_val(); |
| 6684 |
|
6684 |
|
| 6685 |
// Break use-def graph loops. |
6685 |
// Break use-def graph loops. |
| 6686 |
if (!Visited.insert(I).second) |
6686 |
if (!Visited.insert(I).second) |
| 6687 |
continue; |
6687 |
continue; |
| 6688 |
|
6688 |
|
| 6689 |
// For a PHI node, push all of its users. |
6689 |
// For a PHI node, push all of its users. |
| 6690 |
if (auto *Phi = dyn_cast(I)) { |
6690 |
if (auto *Phi = dyn_cast(I)) { |
| 6691 |
for (auto *U : Phi->users()) |
6691 |
for (auto *U : Phi->users()) |
| 6692 |
WorkList.push_back(cast(U)); |
6692 |
WorkList.push_back(cast(U)); |
| 6693 |
continue; |
6693 |
continue; |
| 6694 |
} |
6694 |
} |
| 6695 |
|
6695 |
|
| 6696 |
switch (I->getOpcode()) { |
6696 |
switch (I->getOpcode()) { |
| 6697 |
case Instruction::And: { |
6697 |
case Instruction::And: { |
| 6698 |
auto *AndC = dyn_cast(I->getOperand(1)); |
6698 |
auto *AndC = dyn_cast(I->getOperand(1)); |
| 6699 |
if (!AndC) |
6699 |
if (!AndC) |
| 6700 |
return false; |
6700 |
return false; |
| 6701 |
APInt AndBits = AndC->getValue(); |
6701 |
APInt AndBits = AndC->getValue(); |
| 6702 |
DemandBits |= AndBits; |
6702 |
DemandBits |= AndBits; |
| 6703 |
// Keep track of the widest and mask we see. |
6703 |
// Keep track of the widest and mask we see. |
| 6704 |
if (AndBits.ugt(WidestAndBits)) |
6704 |
if (AndBits.ugt(WidestAndBits)) |
| 6705 |
WidestAndBits = AndBits; |
6705 |
WidestAndBits = AndBits; |
| 6706 |
if (AndBits == WidestAndBits && I->getOperand(0) == Load) |
6706 |
if (AndBits == WidestAndBits && I->getOperand(0) == Load) |
| 6707 |
AndsToMaybeRemove.push_back(I); |
6707 |
AndsToMaybeRemove.push_back(I); |
| 6708 |
break; |
6708 |
break; |
| 6709 |
} |
6709 |
} |
| 6710 |
|
6710 |
|
| 6711 |
case Instruction::Shl: { |
6711 |
case Instruction::Shl: { |
| 6712 |
auto *ShlC = dyn_cast(I->getOperand(1)); |
6712 |
auto *ShlC = dyn_cast(I->getOperand(1)); |
| 6713 |
if (!ShlC) |
6713 |
if (!ShlC) |
| 6714 |
return false; |
6714 |
return false; |
| 6715 |
uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); |
6715 |
uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); |
| 6716 |
DemandBits.setLowBits(BitWidth - ShiftAmt); |
6716 |
DemandBits.setLowBits(BitWidth - ShiftAmt); |
| 6717 |
break; |
6717 |
break; |
| 6718 |
} |
6718 |
} |
| 6719 |
|
6719 |
|
| 6720 |
case Instruction::Trunc: { |
6720 |
case Instruction::Trunc: { |
| 6721 |
EVT TruncVT = TLI->getValueType(*DL, I->getType()); |
6721 |
EVT TruncVT = TLI->getValueType(*DL, I->getType()); |
| 6722 |
unsigned TruncBitWidth = TruncVT.getSizeInBits(); |
6722 |
unsigned TruncBitWidth = TruncVT.getSizeInBits(); |
| 6723 |
DemandBits.setLowBits(TruncBitWidth); |
6723 |
DemandBits.setLowBits(TruncBitWidth); |
| 6724 |
break; |
6724 |
break; |
| 6725 |
} |
6725 |
} |
| 6726 |
|
6726 |
|
| 6727 |
default: |
6727 |
default: |
| 6728 |
return false; |
6728 |
return false; |
| 6729 |
} |
6729 |
} |
| 6730 |
} |
6730 |
} |
| 6731 |
|
6731 |
|
| 6732 |
uint32_t ActiveBits = DemandBits.getActiveBits(); |
6732 |
uint32_t ActiveBits = DemandBits.getActiveBits(); |
| 6733 |
// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the |
6733 |
// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the |
| 6734 |
// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, |
6734 |
// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, |
| 6735 |
// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but |
6735 |
// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but |
| 6736 |
// (and (load x) 1) is not matched as a single instruction, rather as a LDR |
6736 |
// (and (load x) 1) is not matched as a single instruction, rather as a LDR |
| 6737 |
// followed by an AND. |
6737 |
// followed by an AND. |
| 6738 |
// TODO: Look into removing this restriction by fixing backends to either |
6738 |
// TODO: Look into removing this restriction by fixing backends to either |
| 6739 |
// return false for isLoadExtLegal for i1 or have them select this pattern to |
6739 |
// return false for isLoadExtLegal for i1 or have them select this pattern to |
| 6740 |
// a single instruction. |
6740 |
// a single instruction. |
| 6741 |
// |
6741 |
// |
| 6742 |
// Also avoid hoisting if we didn't see any ands with the exact DemandBits |
6742 |
// Also avoid hoisting if we didn't see any ands with the exact DemandBits |
| 6743 |
// mask, since these are the only ands that will be removed by isel. |
6743 |
// mask, since these are the only ands that will be removed by isel. |
| 6744 |
if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) || |
6744 |
if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) || |
| 6745 |
WidestAndBits != DemandBits) |
6745 |
WidestAndBits != DemandBits) |
| 6746 |
return false; |
6746 |
return false; |
| 6747 |
|
6747 |
|
| 6748 |
LLVMContext &Ctx = Load->getType()->getContext(); |
6748 |
LLVMContext &Ctx = Load->getType()->getContext(); |
| 6749 |
Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); |
6749 |
Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); |
| 6750 |
EVT TruncVT = TLI->getValueType(*DL, TruncTy); |
6750 |
EVT TruncVT = TLI->getValueType(*DL, TruncTy); |
| 6751 |
|
6751 |
|
| 6752 |
// Reject cases that won't be matched as extloads. |
6752 |
// Reject cases that won't be matched as extloads. |
| 6753 |
if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || |
6753 |
if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || |
| 6754 |
!TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) |
6754 |
!TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) |
| 6755 |
return false; |
6755 |
return false; |
| 6756 |
|
6756 |
|
| 6757 |
IRBuilder<> Builder(Load->getNextNode()); |
6757 |
IRBuilder<> Builder(Load->getNextNode()); |
| 6758 |
auto *NewAnd = cast( |
6758 |
auto *NewAnd = cast( |
| 6759 |
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); |
6759 |
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); |
| 6760 |
// Mark this instruction as "inserted by CGP", so that other |
6760 |
// Mark this instruction as "inserted by CGP", so that other |
| 6761 |
// optimizations don't touch it. |
6761 |
// optimizations don't touch it. |
| 6762 |
InsertedInsts.insert(NewAnd); |
6762 |
InsertedInsts.insert(NewAnd); |
| 6763 |
|
6763 |
|
| 6764 |
// Replace all uses of load with new and (except for the use of load in the |
6764 |
// Replace all uses of load with new and (except for the use of load in the |
| 6765 |
// new and itself). |
6765 |
// new and itself). |
| 6766 |
replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc); |
6766 |
replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc); |
| 6767 |
NewAnd->setOperand(0, Load); |
6767 |
NewAnd->setOperand(0, Load); |
| 6768 |
|
6768 |
|
| 6769 |
// Remove any and instructions that are now redundant. |
6769 |
// Remove any and instructions that are now redundant. |
| 6770 |
for (auto *And : AndsToMaybeRemove) |
6770 |
for (auto *And : AndsToMaybeRemove) |
| 6771 |
// Check that the and mask is the same as the one we decided to put on the |
6771 |
// Check that the and mask is the same as the one we decided to put on the |
| 6772 |
// new and. |
6772 |
// new and. |
| 6773 |
if (cast(And->getOperand(1))->getValue() == DemandBits) { |
6773 |
if (cast(And->getOperand(1))->getValue() == DemandBits) { |
| 6774 |
replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc); |
6774 |
replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc); |
| 6775 |
if (&*CurInstIterator == And) |
6775 |
if (&*CurInstIterator == And) |
| 6776 |
CurInstIterator = std::next(And->getIterator()); |
6776 |
CurInstIterator = std::next(And->getIterator()); |
| 6777 |
And->eraseFromParent(); |
6777 |
And->eraseFromParent(); |
| 6778 |
++NumAndUses; |
6778 |
++NumAndUses; |
| 6779 |
} |
6779 |
} |
| 6780 |
|
6780 |
|
| 6781 |
++NumAndsAdded; |
6781 |
++NumAndsAdded; |
| 6782 |
return true; |
6782 |
return true; |
| 6783 |
} |
6783 |
} |
| 6784 |
|
6784 |
|
| 6785 |
/// Check if V (an operand of a select instruction) is an expensive instruction |
6785 |
/// Check if V (an operand of a select instruction) is an expensive instruction |
| 6786 |
/// that is only used once. |
6786 |
/// that is only used once. |
| 6787 |
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { |
6787 |
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { |
| 6788 |
auto *I = dyn_cast(V); |
6788 |
auto *I = dyn_cast(V); |
| 6789 |
// If it's safe to speculatively execute, then it should not have side |
6789 |
// If it's safe to speculatively execute, then it should not have side |
| 6790 |
// effects; therefore, it's safe to sink and possibly *not* execute. |
6790 |
// effects; therefore, it's safe to sink and possibly *not* execute. |
| 6791 |
return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && |
6791 |
return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && |
| 6792 |
TTI->isExpensiveToSpeculativelyExecute(I); |
6792 |
TTI->isExpensiveToSpeculativelyExecute(I); |
| 6793 |
} |
6793 |
} |
| 6794 |
|
6794 |
|
| 6795 |
/// Returns true if a SelectInst should be turned into an explicit branch. |
6795 |
/// Returns true if a SelectInst should be turned into an explicit branch. |
| 6796 |
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, |
6796 |
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, |
| 6797 |
const TargetLowering *TLI, |
6797 |
const TargetLowering *TLI, |
| 6798 |
SelectInst *SI) { |
6798 |
SelectInst *SI) { |
| 6799 |
// If even a predictable select is cheap, then a branch can't be cheaper. |
6799 |
// If even a predictable select is cheap, then a branch can't be cheaper. |
| 6800 |
if (!TLI->isPredictableSelectExpensive()) |
6800 |
if (!TLI->isPredictableSelectExpensive()) |
| 6801 |
return false; |
6801 |
return false; |
| 6802 |
|
6802 |
|
| 6803 |
// FIXME: This should use the same heuristics as IfConversion to determine |
6803 |
// FIXME: This should use the same heuristics as IfConversion to determine |
| 6804 |
// whether a select is better represented as a branch. |
6804 |
// whether a select is better represented as a branch. |
| 6805 |
|
6805 |
|
| 6806 |
// If metadata tells us that the select condition is obviously predictable, |
6806 |
// If metadata tells us that the select condition is obviously predictable, |
| 6807 |
// then we want to replace the select with a branch. |
6807 |
// then we want to replace the select with a branch. |
| 6808 |
uint64_t TrueWeight, FalseWeight; |
6808 |
uint64_t TrueWeight, FalseWeight; |
| 6809 |
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { |
6809 |
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { |
| 6810 |
uint64_t Max = std::max(TrueWeight, FalseWeight); |
6810 |
uint64_t Max = std::max(TrueWeight, FalseWeight); |
| 6811 |
uint64_t Sum = TrueWeight + FalseWeight; |
6811 |
uint64_t Sum = TrueWeight + FalseWeight; |
| 6812 |
if (Sum != 0) { |
6812 |
if (Sum != 0) { |
| 6813 |
auto Probability = BranchProbability::getBranchProbability(Max, Sum); |
6813 |
auto Probability = BranchProbability::getBranchProbability(Max, Sum); |
| 6814 |
if (Probability > TTI->getPredictableBranchThreshold()) |
6814 |
if (Probability > TTI->getPredictableBranchThreshold()) |
| 6815 |
return true; |
6815 |
return true; |
| 6816 |
} |
6816 |
} |
| 6817 |
} |
6817 |
} |
| 6818 |
|
6818 |
|
| 6819 |
CmpInst *Cmp = dyn_cast(SI->getCondition()); |
6819 |
CmpInst *Cmp = dyn_cast(SI->getCondition()); |
| 6820 |
|
6820 |
|
| 6821 |
// If a branch is predictable, an out-of-order CPU can avoid blocking on its |
6821 |
// If a branch is predictable, an out-of-order CPU can avoid blocking on its |
| 6822 |
// comparison condition. If the compare has more than one use, there's |
6822 |
// comparison condition. If the compare has more than one use, there's |
| 6823 |
// probably another cmov or setcc around, so it's not worth emitting a branch. |
6823 |
// probably another cmov or setcc around, so it's not worth emitting a branch. |
| 6824 |
if (!Cmp || !Cmp->hasOneUse()) |
6824 |
if (!Cmp || !Cmp->hasOneUse()) |
| 6825 |
return false; |
6825 |
return false; |
| 6826 |
|
6826 |
|
| 6827 |
// If either operand of the select is expensive and only needed on one side |
6827 |
// If either operand of the select is expensive and only needed on one side |
| 6828 |
// of the select, we should form a branch. |
6828 |
// of the select, we should form a branch. |
| 6829 |
if (sinkSelectOperand(TTI, SI->getTrueValue()) || |
6829 |
if (sinkSelectOperand(TTI, SI->getTrueValue()) || |
| 6830 |
sinkSelectOperand(TTI, SI->getFalseValue())) |
6830 |
sinkSelectOperand(TTI, SI->getFalseValue())) |
| 6831 |
return true; |
6831 |
return true; |
| 6832 |
|
6832 |
|
| 6833 |
return false; |
6833 |
return false; |
| 6834 |
} |
6834 |
} |
| 6835 |
|
6835 |
|
| 6836 |
/// If \p isTrue is true, return the true value of \p SI, otherwise return |
6836 |
/// If \p isTrue is true, return the true value of \p SI, otherwise return |
| 6837 |
/// false value of \p SI. If the true/false value of \p SI is defined by any |
6837 |
/// false value of \p SI. If the true/false value of \p SI is defined by any |
| 6838 |
/// select instructions in \p Selects, look through the defining select |
6838 |
/// select instructions in \p Selects, look through the defining select |
| 6839 |
/// instruction until the true/false value is not defined in \p Selects. |
6839 |
/// instruction until the true/false value is not defined in \p Selects. |
| 6840 |
static Value * |
6840 |
static Value * |
| 6841 |
getTrueOrFalseValue(SelectInst *SI, bool isTrue, |
6841 |
getTrueOrFalseValue(SelectInst *SI, bool isTrue, |
| 6842 |
const SmallPtrSet &Selects) { |
6842 |
const SmallPtrSet &Selects) { |
| 6843 |
Value *V = nullptr; |
6843 |
Value *V = nullptr; |
| 6844 |
|
6844 |
|
| 6845 |
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); |
6845 |
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); |
| 6846 |
DefSI = dyn_cast(V)) { |
6846 |
DefSI = dyn_cast(V)) { |
| 6847 |
assert(DefSI->getCondition() == SI->getCondition() && |
6847 |
assert(DefSI->getCondition() == SI->getCondition() && |
| 6848 |
"The condition of DefSI does not match with SI"); |
6848 |
"The condition of DefSI does not match with SI"); |
| 6849 |
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); |
6849 |
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); |
| 6850 |
} |
6850 |
} |
| 6851 |
|
6851 |
|
| 6852 |
assert(V && "Failed to get select true/false value"); |
6852 |
assert(V && "Failed to get select true/false value"); |
| 6853 |
return V; |
6853 |
return V; |
| 6854 |
} |
6854 |
} |
| 6855 |
|
6855 |
|
| 6856 |
bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { |
6856 |
bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { |
| 6857 |
assert(Shift->isShift() && "Expected a shift"); |
6857 |
assert(Shift->isShift() && "Expected a shift"); |
| 6858 |
|
6858 |
|
| 6859 |
// If this is (1) a vector shift, (2) shifts by scalars are cheaper than |
6859 |
// If this is (1) a vector shift, (2) shifts by scalars are cheaper than |
| 6860 |
// general vector shifts, and (3) the shift amount is a select-of-splatted |
6860 |
// general vector shifts, and (3) the shift amount is a select-of-splatted |
| 6861 |
// values, hoist the shifts before the select: |
6861 |
// values, hoist the shifts before the select: |
| 6862 |
// shift Op0, (select Cond, TVal, FVal) --> |
6862 |
// shift Op0, (select Cond, TVal, FVal) --> |
| 6863 |
// select Cond, (shift Op0, TVal), (shift Op0, FVal) |
6863 |
// select Cond, (shift Op0, TVal), (shift Op0, FVal) |
| 6864 |
// |
6864 |
// |
| 6865 |
// This is inverting a generic IR transform when we know that the cost of a |
6865 |
// This is inverting a generic IR transform when we know that the cost of a |
| 6866 |
// general vector shift is more than the cost of 2 shift-by-scalars. |
6866 |
// general vector shift is more than the cost of 2 shift-by-scalars. |
| 6867 |
// We can't do this effectively in SDAG because we may not be able to |
6867 |
// We can't do this effectively in SDAG because we may not be able to |
| 6868 |
// determine if the select operands are splats from within a basic block. |
6868 |
// determine if the select operands are splats from within a basic block. |
| 6869 |
Type *Ty = Shift->getType(); |
6869 |
Type *Ty = Shift->getType(); |
| 6870 |
if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) |
6870 |
if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) |
| 6871 |
return false; |
6871 |
return false; |
| 6872 |
Value *Cond, *TVal, *FVal; |
6872 |
Value *Cond, *TVal, *FVal; |
| 6873 |
if (!match(Shift->getOperand(1), |
6873 |
if (!match(Shift->getOperand(1), |
| 6874 |
m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) |
6874 |
m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) |
| 6875 |
return false; |
6875 |
return false; |
| 6876 |
if (!isSplatValue(TVal) || !isSplatValue(FVal)) |
6876 |
if (!isSplatValue(TVal) || !isSplatValue(FVal)) |
| 6877 |
return false; |
6877 |
return false; |
| 6878 |
|
6878 |
|
| 6879 |
IRBuilder<> Builder(Shift); |
6879 |
IRBuilder<> Builder(Shift); |
| 6880 |
BinaryOperator::BinaryOps Opcode = Shift->getOpcode(); |
6880 |
BinaryOperator::BinaryOps Opcode = Shift->getOpcode(); |
| 6881 |
Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); |
6881 |
Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); |
| 6882 |
Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); |
6882 |
Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); |
| 6883 |
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); |
6883 |
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); |
| 6884 |
replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc); |
6884 |
replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc); |
| 6885 |
Shift->eraseFromParent(); |
6885 |
Shift->eraseFromParent(); |
| 6886 |
return true; |
6886 |
return true; |
| 6887 |
} |
6887 |
} |
| 6888 |
|
6888 |
|
| 6889 |
bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { |
6889 |
bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) { |
| 6890 |
Intrinsic::ID Opcode = Fsh->getIntrinsicID(); |
6890 |
Intrinsic::ID Opcode = Fsh->getIntrinsicID(); |
| 6891 |
assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && |
6891 |
assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) && |
| 6892 |
"Expected a funnel shift"); |
6892 |
"Expected a funnel shift"); |
| 6893 |
|
6893 |
|
| 6894 |
// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper |
6894 |
// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper |
| 6895 |
// than general vector shifts, and (3) the shift amount is select-of-splatted |
6895 |
// than general vector shifts, and (3) the shift amount is select-of-splatted |
| 6896 |
// values, hoist the funnel shifts before the select: |
6896 |
// values, hoist the funnel shifts before the select: |
| 6897 |
// fsh Op0, Op1, (select Cond, TVal, FVal) --> |
6897 |
// fsh Op0, Op1, (select Cond, TVal, FVal) --> |
| 6898 |
// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal) |
6898 |
// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal) |
| 6899 |
// |
6899 |
// |
| 6900 |
// This is inverting a generic IR transform when we know that the cost of a |
6900 |
// This is inverting a generic IR transform when we know that the cost of a |
| 6901 |
// general vector shift is more than the cost of 2 shift-by-scalars. |
6901 |
// general vector shift is more than the cost of 2 shift-by-scalars. |
| 6902 |
// We can't do this effectively in SDAG because we may not be able to |
6902 |
// We can't do this effectively in SDAG because we may not be able to |
| 6903 |
// determine if the select operands are splats from within a basic block. |
6903 |
// determine if the select operands are splats from within a basic block. |
| 6904 |
Type *Ty = Fsh->getType(); |
6904 |
Type *Ty = Fsh->getType(); |
| 6905 |
if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) |
6905 |
if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) |
| 6906 |
return false; |
6906 |
return false; |
| 6907 |
Value *Cond, *TVal, *FVal; |
6907 |
Value *Cond, *TVal, *FVal; |
| 6908 |
if (!match(Fsh->getOperand(2), |
6908 |
if (!match(Fsh->getOperand(2), |
| 6909 |
m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) |
6909 |
m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) |
| 6910 |
return false; |
6910 |
return false; |
| 6911 |
if (!isSplatValue(TVal) || !isSplatValue(FVal)) |
6911 |
if (!isSplatValue(TVal) || !isSplatValue(FVal)) |
| 6912 |
return false; |
6912 |
return false; |
| 6913 |
|
6913 |
|
| 6914 |
IRBuilder<> Builder(Fsh); |
6914 |
IRBuilder<> Builder(Fsh); |
| 6915 |
Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1); |
6915 |
Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1); |
| 6916 |
Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal}); |
6916 |
Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal}); |
| 6917 |
Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal}); |
6917 |
Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal}); |
| 6918 |
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); |
6918 |
Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); |
| 6919 |
replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc); |
6919 |
replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc); |
| 6920 |
Fsh->eraseFromParent(); |
6920 |
Fsh->eraseFromParent(); |
| 6921 |
return true; |
6921 |
return true; |
| 6922 |
} |
6922 |
} |
| 6923 |
|
6923 |
|
| 6924 |
/// If we have a SelectInst that will likely profit from branch prediction, |
6924 |
/// If we have a SelectInst that will likely profit from branch prediction, |
| 6925 |
/// turn it into a branch. |
6925 |
/// turn it into a branch. |
| 6926 |
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { |
6926 |
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { |
| 6927 |
if (DisableSelectToBranch) |
6927 |
if (DisableSelectToBranch) |
| 6928 |
return false; |
6928 |
return false; |
| 6929 |
|
6929 |
|
| 6930 |
// If the SelectOptimize pass is enabled, selects have already been optimized. |
6930 |
// If the SelectOptimize pass is enabled, selects have already been optimized. |
| 6931 |
if (!getCGPassBuilderOption().DisableSelectOptimize) |
6931 |
if (!getCGPassBuilderOption().DisableSelectOptimize) |
| 6932 |
return false; |
6932 |
return false; |
| 6933 |
|
6933 |
|
| 6934 |
// Find all consecutive select instructions that share the same condition. |
6934 |
// Find all consecutive select instructions that share the same condition. |
| 6935 |
SmallVector ASI; |
6935 |
SmallVector ASI; |
| 6936 |
ASI.push_back(SI); |
6936 |
ASI.push_back(SI); |
| 6937 |
for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); |
6937 |
for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); |
| 6938 |
It != SI->getParent()->end(); ++It) { |
6938 |
It != SI->getParent()->end(); ++It) { |
| 6939 |
SelectInst *I = dyn_cast(&*It); |
6939 |
SelectInst *I = dyn_cast(&*It); |
| 6940 |
if (I && SI->getCondition() == I->getCondition()) { |
6940 |
if (I && SI->getCondition() == I->getCondition()) { |
| 6941 |
ASI.push_back(I); |
6941 |
ASI.push_back(I); |
| 6942 |
} else { |
6942 |
} else { |
| 6943 |
break; |
6943 |
break; |
| 6944 |
} |
6944 |
} |
| 6945 |
} |
6945 |
} |
| 6946 |
|
6946 |
|
| 6947 |
SelectInst *LastSI = ASI.back(); |
6947 |
SelectInst *LastSI = ASI.back(); |
| 6948 |
// Increment the current iterator to skip all the rest of select instructions |
6948 |
// Increment the current iterator to skip all the rest of select instructions |
| 6949 |
// because they will be either "not lowered" or "all lowered" to branch. |
6949 |
// because they will be either "not lowered" or "all lowered" to branch. |
| 6950 |
CurInstIterator = std::next(LastSI->getIterator()); |
6950 |
CurInstIterator = std::next(LastSI->getIterator()); |
| 6951 |
|
6951 |
|
| 6952 |
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); |
6952 |
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); |
| 6953 |
|
6953 |
|
| 6954 |
// Can we convert the 'select' to CF ? |
6954 |
// Can we convert the 'select' to CF ? |
| 6955 |
if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable)) |
6955 |
if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable)) |
| 6956 |
return false; |
6956 |
return false; |
| 6957 |
|
6957 |
|
| 6958 |
TargetLowering::SelectSupportKind SelectKind; |
6958 |
TargetLowering::SelectSupportKind SelectKind; |
| 6959 |
if (SI->getType()->isVectorTy()) |
6959 |
if (SI->getType()->isVectorTy()) |
| 6960 |
SelectKind = TargetLowering::ScalarCondVectorVal; |
6960 |
SelectKind = TargetLowering::ScalarCondVectorVal; |
| 6961 |
else |
6961 |
else |
| 6962 |
SelectKind = TargetLowering::ScalarValSelect; |
6962 |
SelectKind = TargetLowering::ScalarValSelect; |
| 6963 |
|
6963 |
|
| 6964 |
if (TLI->isSelectSupported(SelectKind) && |
6964 |
if (TLI->isSelectSupported(SelectKind) && |
| 6965 |
(!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || |
6965 |
(!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || |
| 6966 |
llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) |
6966 |
llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) |
| 6967 |
return false; |
6967 |
return false; |
| 6968 |
|
6968 |
|
| 6969 |
// The DominatorTree needs to be rebuilt by any consumers after this |
6969 |
// The DominatorTree needs to be rebuilt by any consumers after this |
| 6970 |
// transformation. We simply reset here rather than setting the ModifiedDT |
6970 |
// transformation. We simply reset here rather than setting the ModifiedDT |
| 6971 |
// flag to avoid restarting the function walk in runOnFunction for each |
6971 |
// flag to avoid restarting the function walk in runOnFunction for each |
| 6972 |
// select optimized. |
6972 |
// select optimized. |
| 6973 |
DT.reset(); |
6973 |
DT.reset(); |
| 6974 |
|
6974 |
|
| 6975 |
// Transform a sequence like this: |
6975 |
// Transform a sequence like this: |
| 6976 |
// start: |
6976 |
// start: |
| 6977 |
// %cmp = cmp uge i32 %a, %b |
6977 |
// %cmp = cmp uge i32 %a, %b |
| 6978 |
// %sel = select i1 %cmp, i32 %c, i32 %d |
6978 |
// %sel = select i1 %cmp, i32 %c, i32 %d |
| 6979 |
// |
6979 |
// |
| 6980 |
// Into: |
6980 |
// Into: |
| 6981 |
// start: |
6981 |
// start: |
| 6982 |
// %cmp = cmp uge i32 %a, %b |
6982 |
// %cmp = cmp uge i32 %a, %b |
| 6983 |
// %cmp.frozen = freeze %cmp |
6983 |
// %cmp.frozen = freeze %cmp |
| 6984 |
// br i1 %cmp.frozen, label %select.true, label %select.false |
6984 |
// br i1 %cmp.frozen, label %select.true, label %select.false |
| 6985 |
// select.true: |
6985 |
// select.true: |
| 6986 |
// br label %select.end |
6986 |
// br label %select.end |
| 6987 |
// select.false: |
6987 |
// select.false: |
| 6988 |
// br label %select.end |
6988 |
// br label %select.end |
| 6989 |
// select.end: |
6989 |
// select.end: |
| 6990 |
// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] |
6990 |
// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] |
| 6991 |
// |
6991 |
// |
| 6992 |
// %cmp should be frozen, otherwise it may introduce undefined behavior. |
6992 |
// %cmp should be frozen, otherwise it may introduce undefined behavior. |
| 6993 |
// In addition, we may sink instructions that produce %c or %d from |
6993 |
// In addition, we may sink instructions that produce %c or %d from |
| 6994 |
// the entry block into the destination(s) of the new branch. |
6994 |
// the entry block into the destination(s) of the new branch. |
| 6995 |
// If the true or false blocks do not contain a sunken instruction, that |
6995 |
// If the true or false blocks do not contain a sunken instruction, that |
| 6996 |
// block and its branch may be optimized away. In that case, one side of the |
6996 |
// block and its branch may be optimized away. In that case, one side of the |
| 6997 |
// first branch will point directly to select.end, and the corresponding PHI |
6997 |
// first branch will point directly to select.end, and the corresponding PHI |
| 6998 |
// predecessor block will be the start block. |
6998 |
// predecessor block will be the start block. |
| 6999 |
|
6999 |
|
| 7000 |
// Collect values that go on the true side and the values that go on the false |
7000 |
// Collect values that go on the true side and the values that go on the false |
| 7001 |
// side. |
7001 |
// side. |
| 7002 |
SmallVector TrueInstrs, FalseInstrs; |
7002 |
SmallVector TrueInstrs, FalseInstrs; |
| 7003 |
for (SelectInst *SI : ASI) { |
7003 |
for (SelectInst *SI : ASI) { |
| 7004 |
if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V)) |
7004 |
if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V)) |
| 7005 |
TrueInstrs.push_back(cast(V)); |
7005 |
TrueInstrs.push_back(cast(V)); |
| 7006 |
if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V)) |
7006 |
if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V)) |
| 7007 |
FalseInstrs.push_back(cast(V)); |
7007 |
FalseInstrs.push_back(cast(V)); |
| 7008 |
} |
7008 |
} |
| 7009 |
|
7009 |
|
| 7010 |
// Split the select block, according to how many (if any) values go on each |
7010 |
// Split the select block, according to how many (if any) values go on each |
| 7011 |
// side. |
7011 |
// side. |
| 7012 |
BasicBlock *StartBlock = SI->getParent(); |
7012 |
BasicBlock *StartBlock = SI->getParent(); |
| 7013 |
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); |
7013 |
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); |
| 7014 |
|
7014 |
|
| 7015 |
IRBuilder<> IB(SI); |
7015 |
IRBuilder<> IB(SI); |
| 7016 |
auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); |
7016 |
auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); |
| 7017 |
|
7017 |
|
| 7018 |
BasicBlock *TrueBlock = nullptr; |
7018 |
BasicBlock *TrueBlock = nullptr; |
| 7019 |
BasicBlock *FalseBlock = nullptr; |
7019 |
BasicBlock *FalseBlock = nullptr; |
| 7020 |
BasicBlock *EndBlock = nullptr; |
7020 |
BasicBlock *EndBlock = nullptr; |
| 7021 |
BranchInst *TrueBranch = nullptr; |
7021 |
BranchInst *TrueBranch = nullptr; |
| 7022 |
BranchInst *FalseBranch = nullptr; |
7022 |
BranchInst *FalseBranch = nullptr; |
| 7023 |
if (TrueInstrs.size() == 0) { |
7023 |
if (TrueInstrs.size() == 0) { |
| 7024 |
FalseBranch = cast(SplitBlockAndInsertIfElse( |
7024 |
FalseBranch = cast(SplitBlockAndInsertIfElse( |
| 7025 |
CondFr, &*SplitPt, false, nullptr, nullptr, LI)); |
7025 |
CondFr, &*SplitPt, false, nullptr, nullptr, LI)); |
| 7026 |
FalseBlock = FalseBranch->getParent(); |
7026 |
FalseBlock = FalseBranch->getParent(); |
| 7027 |
EndBlock = cast(FalseBranch->getOperand(0)); |
7027 |
EndBlock = cast(FalseBranch->getOperand(0)); |
| 7028 |
} else if (FalseInstrs.size() == 0) { |
7028 |
} else if (FalseInstrs.size() == 0) { |
| 7029 |
TrueBranch = cast(SplitBlockAndInsertIfThen( |
7029 |
TrueBranch = cast(SplitBlockAndInsertIfThen( |
| 7030 |
CondFr, &*SplitPt, false, nullptr, nullptr, LI)); |
7030 |
CondFr, &*SplitPt, false, nullptr, nullptr, LI)); |
| 7031 |
TrueBlock = TrueBranch->getParent(); |
7031 |
TrueBlock = TrueBranch->getParent(); |
| 7032 |
EndBlock = cast(TrueBranch->getOperand(0)); |
7032 |
EndBlock = cast(TrueBranch->getOperand(0)); |
| 7033 |
} else { |
7033 |
} else { |
| 7034 |
Instruction *ThenTerm = nullptr; |
7034 |
Instruction *ThenTerm = nullptr; |
| 7035 |
Instruction *ElseTerm = nullptr; |
7035 |
Instruction *ElseTerm = nullptr; |
| 7036 |
SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm, |
7036 |
SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm, |
| 7037 |
nullptr, nullptr, LI); |
7037 |
nullptr, nullptr, LI); |
| 7038 |
TrueBranch = cast(ThenTerm); |
7038 |
TrueBranch = cast(ThenTerm); |
| 7039 |
FalseBranch = cast(ElseTerm); |
7039 |
FalseBranch = cast(ElseTerm); |
| 7040 |
TrueBlock = TrueBranch->getParent(); |
7040 |
TrueBlock = TrueBranch->getParent(); |
| 7041 |
FalseBlock = FalseBranch->getParent(); |
7041 |
FalseBlock = FalseBranch->getParent(); |
| 7042 |
EndBlock = cast(TrueBranch->getOperand(0)); |
7042 |
EndBlock = cast(TrueBranch->getOperand(0)); |
| 7043 |
} |
7043 |
} |
| 7044 |
|
7044 |
|
| 7045 |
EndBlock->setName("select.end"); |
7045 |
EndBlock->setName("select.end"); |
| 7046 |
if (TrueBlock) |
7046 |
if (TrueBlock) |
| 7047 |
TrueBlock->setName("select.true.sink"); |
7047 |
TrueBlock->setName("select.true.sink"); |
| 7048 |
if (FalseBlock) |
7048 |
if (FalseBlock) |
| 7049 |
FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false" |
7049 |
FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false" |
| 7050 |
: "select.false.sink"); |
7050 |
: "select.false.sink"); |
| 7051 |
|
7051 |
|
| 7052 |
if (IsHugeFunc) { |
7052 |
if (IsHugeFunc) { |
| 7053 |
if (TrueBlock) |
7053 |
if (TrueBlock) |
| 7054 |
FreshBBs.insert(TrueBlock); |
7054 |
FreshBBs.insert(TrueBlock); |
| 7055 |
if (FalseBlock) |
7055 |
if (FalseBlock) |
| 7056 |
FreshBBs.insert(FalseBlock); |
7056 |
FreshBBs.insert(FalseBlock); |
| 7057 |
FreshBBs.insert(EndBlock); |
7057 |
FreshBBs.insert(EndBlock); |
| 7058 |
} |
7058 |
} |
| 7059 |
|
7059 |
|
| 7060 |
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); |
7060 |
BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); |
| 7061 |
|
7061 |
|
| 7062 |
static const unsigned MD[] = { |
7062 |
static const unsigned MD[] = { |
| 7063 |
LLVMContext::MD_prof, LLVMContext::MD_unpredictable, |
7063 |
LLVMContext::MD_prof, LLVMContext::MD_unpredictable, |
| 7064 |
LLVMContext::MD_make_implicit, LLVMContext::MD_dbg}; |
7064 |
LLVMContext::MD_make_implicit, LLVMContext::MD_dbg}; |
| 7065 |
StartBlock->getTerminator()->copyMetadata(*SI, MD); |
7065 |
StartBlock->getTerminator()->copyMetadata(*SI, MD); |
| 7066 |
|
7066 |
|
| 7067 |
// Sink expensive instructions into the conditional blocks to avoid executing |
7067 |
// Sink expensive instructions into the conditional blocks to avoid executing |
| 7068 |
// them speculatively. |
7068 |
// them speculatively. |
| 7069 |
for (Instruction *I : TrueInstrs) |
7069 |
for (Instruction *I : TrueInstrs) |
| 7070 |
I->moveBefore(TrueBranch); |
7070 |
I->moveBefore(TrueBranch); |
| 7071 |
for (Instruction *I : FalseInstrs) |
7071 |
for (Instruction *I : FalseInstrs) |
| 7072 |
I->moveBefore(FalseBranch); |
7072 |
I->moveBefore(FalseBranch); |
| 7073 |
|
7073 |
|
| 7074 |
// If we did not create a new block for one of the 'true' or 'false' paths |
7074 |
// If we did not create a new block for one of the 'true' or 'false' paths |
| 7075 |
// of the condition, it means that side of the branch goes to the end block |
7075 |
// of the condition, it means that side of the branch goes to the end block |
| 7076 |
// directly and the path originates from the start block from the point of |
7076 |
// directly and the path originates from the start block from the point of |
| 7077 |
// view of the new PHI. |
7077 |
// view of the new PHI. |
| 7078 |
if (TrueBlock == nullptr) |
7078 |
if (TrueBlock == nullptr) |
| 7079 |
TrueBlock = StartBlock; |
7079 |
TrueBlock = StartBlock; |
| 7080 |
else if (FalseBlock == nullptr) |
7080 |
else if (FalseBlock == nullptr) |
| 7081 |
FalseBlock = StartBlock; |
7081 |
FalseBlock = StartBlock; |
| 7082 |
|
7082 |
|
| 7083 |
SmallPtrSet INS; |
7083 |
SmallPtrSet INS; |
| 7084 |
INS.insert(ASI.begin(), ASI.end()); |
7084 |
INS.insert(ASI.begin(), ASI.end()); |
| 7085 |
// Use reverse iterator because later select may use the value of the |
7085 |
// Use reverse iterator because later select may use the value of the |
| 7086 |
// earlier select, and we need to propagate value through earlier select |
7086 |
// earlier select, and we need to propagate value through earlier select |
| 7087 |
// to get the PHI operand. |
7087 |
// to get the PHI operand. |
| 7088 |
for (SelectInst *SI : llvm::reverse(ASI)) { |
7088 |
for (SelectInst *SI : llvm::reverse(ASI)) { |
| 7089 |
// The select itself is replaced with a PHI Node. |
7089 |
// The select itself is replaced with a PHI Node. |
| 7090 |
PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); |
7090 |
PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); |
| 7091 |
PN->takeName(SI); |
7091 |
PN->takeName(SI); |
| 7092 |
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); |
7092 |
PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); |
| 7093 |
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); |
7093 |
PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); |
| 7094 |
PN->setDebugLoc(SI->getDebugLoc()); |
7094 |
PN->setDebugLoc(SI->getDebugLoc()); |
| 7095 |
|
7095 |
|
| 7096 |
replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc); |
7096 |
replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc); |
| 7097 |
SI->eraseFromParent(); |
7097 |
SI->eraseFromParent(); |
| 7098 |
INS.erase(SI); |
7098 |
INS.erase(SI); |
| 7099 |
++NumSelectsExpanded; |
7099 |
++NumSelectsExpanded; |
| 7100 |
} |
7100 |
} |
| 7101 |
|
7101 |
|
| 7102 |
// Instruct OptimizeBlock to skip to the next block. |
7102 |
// Instruct OptimizeBlock to skip to the next block. |
| 7103 |
CurInstIterator = StartBlock->end(); |
7103 |
CurInstIterator = StartBlock->end(); |
| 7104 |
return true; |
7104 |
return true; |
| 7105 |
} |
7105 |
} |
| 7106 |
|
7106 |
|
| 7107 |
/// Some targets only accept certain types for splat inputs. For example a VDUP |
7107 |
/// Some targets only accept certain types for splat inputs. For example a VDUP |
| 7108 |
/// in MVE takes a GPR (integer) register, and the instruction that incorporate |
7108 |
/// in MVE takes a GPR (integer) register, and the instruction that incorporate |
| 7109 |
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. |
7109 |
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register. |
| 7110 |
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { |
7110 |
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { |
| 7111 |
// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only |
7111 |
// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only |
| 7112 |
if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), |
7112 |
if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), |
| 7113 |
m_Undef(), m_ZeroMask()))) |
7113 |
m_Undef(), m_ZeroMask()))) |
| 7114 |
return false; |
7114 |
return false; |
| 7115 |
Type *NewType = TLI->shouldConvertSplatType(SVI); |
7115 |
Type *NewType = TLI->shouldConvertSplatType(SVI); |
| 7116 |
if (!NewType) |
7116 |
if (!NewType) |
| 7117 |
return false; |
7117 |
return false; |
| 7118 |
|
7118 |
|
| 7119 |
auto *SVIVecType = cast(SVI->getType()); |
7119 |
auto *SVIVecType = cast(SVI->getType()); |
| 7120 |
assert(!NewType->isVectorTy() && "Expected a scalar type!"); |
7120 |
assert(!NewType->isVectorTy() && "Expected a scalar type!"); |
| 7121 |
assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && |
7121 |
assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() && |
| 7122 |
"Expected a type of the same size!"); |
7122 |
"Expected a type of the same size!"); |
| 7123 |
auto *NewVecType = |
7123 |
auto *NewVecType = |
| 7124 |
FixedVectorType::get(NewType, SVIVecType->getNumElements()); |
7124 |
FixedVectorType::get(NewType, SVIVecType->getNumElements()); |
| 7125 |
|
7125 |
|
| 7126 |
// Create a bitcast (shuffle (insert (bitcast(..)))) |
7126 |
// Create a bitcast (shuffle (insert (bitcast(..)))) |
| 7127 |
IRBuilder<> Builder(SVI->getContext()); |
7127 |
IRBuilder<> Builder(SVI->getContext()); |
| 7128 |
Builder.SetInsertPoint(SVI); |
7128 |
Builder.SetInsertPoint(SVI); |
| 7129 |
Value *BC1 = Builder.CreateBitCast( |
7129 |
Value *BC1 = Builder.CreateBitCast( |
| 7130 |
cast(SVI->getOperand(0))->getOperand(1), NewType); |
7130 |
cast(SVI->getOperand(0))->getOperand(1), NewType); |
| 7131 |
Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1); |
7131 |
Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1); |
| 7132 |
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); |
7132 |
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType); |
| 7133 |
|
7133 |
|
| 7134 |
replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc); |
7134 |
replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc); |
| 7135 |
RecursivelyDeleteTriviallyDeadInstructions( |
7135 |
RecursivelyDeleteTriviallyDeadInstructions( |
| 7136 |
SVI, TLInfo, nullptr, |
7136 |
SVI, TLInfo, nullptr, |
| 7137 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
7137 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
| 7138 |
|
7138 |
|
| 7139 |
// Also hoist the bitcast up to its operand if it they are not in the same |
7139 |
// Also hoist the bitcast up to its operand if it they are not in the same |
| 7140 |
// block. |
7140 |
// block. |
| 7141 |
if (auto *BCI = dyn_cast(BC1)) |
7141 |
if (auto *BCI = dyn_cast(BC1)) |
| 7142 |
if (auto *Op = dyn_cast(BCI->getOperand(0))) |
7142 |
if (auto *Op = dyn_cast(BCI->getOperand(0))) |
| 7143 |
if (BCI->getParent() != Op->getParent() && !isa(Op) && |
7143 |
if (BCI->getParent() != Op->getParent() && !isa(Op) && |
| 7144 |
!Op->isTerminator() && !Op->isEHPad()) |
7144 |
!Op->isTerminator() && !Op->isEHPad()) |
| 7145 |
BCI->moveAfter(Op); |
7145 |
BCI->moveAfter(Op); |
| 7146 |
|
7146 |
|
| 7147 |
return true; |
7147 |
return true; |
| 7148 |
} |
7148 |
} |
| 7149 |
|
7149 |
|
| 7150 |
bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { |
7150 |
bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { |
| 7151 |
// If the operands of I can be folded into a target instruction together with |
7151 |
// If the operands of I can be folded into a target instruction together with |
| 7152 |
// I, duplicate and sink them. |
7152 |
// I, duplicate and sink them. |
| 7153 |
SmallVector |
7153 |
SmallVector |
| 7154 |
if (!TLI->shouldSinkOperands(I, OpsToSink)) |
7154 |
if (!TLI->shouldSinkOperands(I, OpsToSink)) |
| 7155 |
return false; |
7155 |
return false; |
| 7156 |
|
7156 |
|
| 7157 |
// OpsToSink can contain multiple uses in a use chain (e.g. |
7157 |
// OpsToSink can contain multiple uses in a use chain (e.g. |
| 7158 |
// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating |
7158 |
// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating |
| 7159 |
// uses must come first, so we process the ops in reverse order so as to not |
7159 |
// uses must come first, so we process the ops in reverse order so as to not |
| 7160 |
// create invalid IR. |
7160 |
// create invalid IR. |
| 7161 |
BasicBlock *TargetBB = I->getParent(); |
7161 |
BasicBlock *TargetBB = I->getParent(); |
| 7162 |
bool Changed = false; |
7162 |
bool Changed = false; |
| 7163 |
SmallVector |
7163 |
SmallVector |
| 7164 |
Instruction *InsertPoint = I; |
7164 |
Instruction *InsertPoint = I; |
| 7165 |
DenseMap InstOrdering; |
7165 |
DenseMap InstOrdering; |
| 7166 |
unsigned long InstNumber = 0; |
7166 |
unsigned long InstNumber = 0; |
| 7167 |
for (const auto &I : *TargetBB) |
7167 |
for (const auto &I : *TargetBB) |
| 7168 |
InstOrdering[&I] = InstNumber++; |
7168 |
InstOrdering[&I] = InstNumber++; |
| 7169 |
|
7169 |
|
| 7170 |
for (Use *U : reverse(OpsToSink)) { |
7170 |
for (Use *U : reverse(OpsToSink)) { |
| 7171 |
auto *UI = cast(U->get()); |
7171 |
auto *UI = cast(U->get()); |
| 7172 |
if (isa(UI)) |
7172 |
if (isa(UI)) |
| 7173 |
continue; |
7173 |
continue; |
| 7174 |
if (UI->getParent() == TargetBB) { |
7174 |
if (UI->getParent() == TargetBB) { |
| 7175 |
if (InstOrdering[UI] < InstOrdering[InsertPoint]) |
7175 |
if (InstOrdering[UI] < InstOrdering[InsertPoint]) |
| 7176 |
InsertPoint = UI; |
7176 |
InsertPoint = UI; |
| 7177 |
continue; |
7177 |
continue; |
| 7178 |
} |
7178 |
} |
| 7179 |
ToReplace.push_back(U); |
7179 |
ToReplace.push_back(U); |
| 7180 |
} |
7180 |
} |
| 7181 |
|
7181 |
|
| 7182 |
SetVector MaybeDead; |
7182 |
SetVector MaybeDead; |
| 7183 |
DenseMap NewInstructions; |
7183 |
DenseMap NewInstructions; |
| 7184 |
for (Use *U : ToReplace) { |
7184 |
for (Use *U : ToReplace) { |
| 7185 |
auto *UI = cast(U->get()); |
7185 |
auto *UI = cast(U->get()); |
| 7186 |
Instruction *NI = UI->clone(); |
7186 |
Instruction *NI = UI->clone(); |
| 7187 |
|
7187 |
|
| 7188 |
if (IsHugeFunc) { |
7188 |
if (IsHugeFunc) { |
| 7189 |
// Now we clone an instruction, its operands' defs may sink to this BB |
7189 |
// Now we clone an instruction, its operands' defs may sink to this BB |
| 7190 |
// now. So we put the operands defs' BBs into FreshBBs to do optimization. |
7190 |
// now. So we put the operands defs' BBs into FreshBBs to do optimization. |
| 7191 |
for (unsigned I = 0; I < NI->getNumOperands(); ++I) { |
7191 |
for (unsigned I = 0; I < NI->getNumOperands(); ++I) { |
| 7192 |
auto *OpDef = dyn_cast(NI->getOperand(I)); |
7192 |
auto *OpDef = dyn_cast(NI->getOperand(I)); |
| 7193 |
if (!OpDef) |
7193 |
if (!OpDef) |
| 7194 |
continue; |
7194 |
continue; |
| 7195 |
FreshBBs.insert(OpDef->getParent()); |
7195 |
FreshBBs.insert(OpDef->getParent()); |
| 7196 |
} |
7196 |
} |
| 7197 |
} |
7197 |
} |
| 7198 |
|
7198 |
|
| 7199 |
NewInstructions[UI] = NI; |
7199 |
NewInstructions[UI] = NI; |
| 7200 |
MaybeDead.insert(UI); |
7200 |
MaybeDead.insert(UI); |
| 7201 |
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); |
7201 |
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); |
| 7202 |
NI->insertBefore(InsertPoint); |
7202 |
NI->insertBefore(InsertPoint); |
| 7203 |
InsertPoint = NI; |
7203 |
InsertPoint = NI; |
| 7204 |
InsertedInsts.insert(NI); |
7204 |
InsertedInsts.insert(NI); |
| 7205 |
|
7205 |
|
| 7206 |
// Update the use for the new instruction, making sure that we update the |
7206 |
// Update the use for the new instruction, making sure that we update the |
| 7207 |
// sunk instruction uses, if it is part of a chain that has already been |
7207 |
// sunk instruction uses, if it is part of a chain that has already been |
| 7208 |
// sunk. |
7208 |
// sunk. |
| 7209 |
Instruction *OldI = cast(U->getUser()); |
7209 |
Instruction *OldI = cast(U->getUser()); |
| 7210 |
if (NewInstructions.count(OldI)) |
7210 |
if (NewInstructions.count(OldI)) |
| 7211 |
NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); |
7211 |
NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); |
| 7212 |
else |
7212 |
else |
| 7213 |
U->set(NI); |
7213 |
U->set(NI); |
| 7214 |
Changed = true; |
7214 |
Changed = true; |
| 7215 |
} |
7215 |
} |
| 7216 |
|
7216 |
|
| 7217 |
// Remove instructions that are dead after sinking. |
7217 |
// Remove instructions that are dead after sinking. |
| 7218 |
for (auto *I : MaybeDead) { |
7218 |
for (auto *I : MaybeDead) { |
| 7219 |
if (!I->hasNUsesOrMore(1)) { |
7219 |
if (!I->hasNUsesOrMore(1)) { |
| 7220 |
LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); |
7220 |
LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); |
| 7221 |
I->eraseFromParent(); |
7221 |
I->eraseFromParent(); |
| 7222 |
} |
7222 |
} |
| 7223 |
} |
7223 |
} |
| 7224 |
|
7224 |
|
| 7225 |
return Changed; |
7225 |
return Changed; |
| 7226 |
} |
7226 |
} |
| 7227 |
|
7227 |
|
| 7228 |
bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { |
7228 |
bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { |
| 7229 |
Value *Cond = SI->getCondition(); |
7229 |
Value *Cond = SI->getCondition(); |
| 7230 |
Type *OldType = Cond->getType(); |
7230 |
Type *OldType = Cond->getType(); |
| 7231 |
LLVMContext &Context = Cond->getContext(); |
7231 |
LLVMContext &Context = Cond->getContext(); |
| 7232 |
EVT OldVT = TLI->getValueType(*DL, OldType); |
7232 |
EVT OldVT = TLI->getValueType(*DL, OldType); |
| 7233 |
MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT); |
7233 |
MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT); |
| 7234 |
unsigned RegWidth = RegType.getSizeInBits(); |
7234 |
unsigned RegWidth = RegType.getSizeInBits(); |
| 7235 |
|
7235 |
|
| 7236 |
if (RegWidth <= cast(OldType)->getBitWidth()) |
7236 |
if (RegWidth <= cast(OldType)->getBitWidth()) |
| 7237 |
return false; |
7237 |
return false; |
| 7238 |
|
7238 |
|
| 7239 |
// If the register width is greater than the type width, expand the condition |
7239 |
// If the register width is greater than the type width, expand the condition |
| 7240 |
// of the switch instruction and each case constant to the width of the |
7240 |
// of the switch instruction and each case constant to the width of the |
| 7241 |
// register. By widening the type of the switch condition, subsequent |
7241 |
// register. By widening the type of the switch condition, subsequent |
| 7242 |
// comparisons (for case comparisons) will not need to be extended to the |
7242 |
// comparisons (for case comparisons) will not need to be extended to the |
| 7243 |
// preferred register width, so we will potentially eliminate N-1 extends, |
7243 |
// preferred register width, so we will potentially eliminate N-1 extends, |
| 7244 |
// where N is the number of cases in the switch. |
7244 |
// where N is the number of cases in the switch. |
| 7245 |
auto *NewType = Type::getIntNTy(Context, RegWidth); |
7245 |
auto *NewType = Type::getIntNTy(Context, RegWidth); |
| 7246 |
|
7246 |
|
| 7247 |
// Extend the switch condition and case constants using the target preferred |
7247 |
// Extend the switch condition and case constants using the target preferred |
| 7248 |
// extend unless the switch condition is a function argument with an extend |
7248 |
// extend unless the switch condition is a function argument with an extend |
| 7249 |
// attribute. In that case, we can avoid an unnecessary mask/extension by |
7249 |
// attribute. In that case, we can avoid an unnecessary mask/extension by |
| 7250 |
// matching the argument extension instead. |
7250 |
// matching the argument extension instead. |
| 7251 |
Instruction::CastOps ExtType = Instruction::ZExt; |
7251 |
Instruction::CastOps ExtType = Instruction::ZExt; |
| 7252 |
// Some targets prefer SExt over ZExt. |
7252 |
// Some targets prefer SExt over ZExt. |
| 7253 |
if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) |
7253 |
if (TLI->isSExtCheaperThanZExt(OldVT, RegType)) |
| 7254 |
ExtType = Instruction::SExt; |
7254 |
ExtType = Instruction::SExt; |
| 7255 |
|
7255 |
|
| 7256 |
if (auto *Arg = dyn_cast(Cond)) { |
7256 |
if (auto *Arg = dyn_cast(Cond)) { |
| 7257 |
if (Arg->hasSExtAttr()) |
7257 |
if (Arg->hasSExtAttr()) |
| 7258 |
ExtType = Instruction::SExt; |
7258 |
ExtType = Instruction::SExt; |
| 7259 |
if (Arg->hasZExtAttr()) |
7259 |
if (Arg->hasZExtAttr()) |
| 7260 |
ExtType = Instruction::ZExt; |
7260 |
ExtType = Instruction::ZExt; |
| 7261 |
} |
7261 |
} |
| 7262 |
|
7262 |
|
| 7263 |
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); |
7263 |
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); |
| 7264 |
ExtInst->insertBefore(SI); |
7264 |
ExtInst->insertBefore(SI); |
| 7265 |
ExtInst->setDebugLoc(SI->getDebugLoc()); |
7265 |
ExtInst->setDebugLoc(SI->getDebugLoc()); |
| 7266 |
SI->setCondition(ExtInst); |
7266 |
SI->setCondition(ExtInst); |
| 7267 |
for (auto Case : SI->cases()) { |
7267 |
for (auto Case : SI->cases()) { |
| 7268 |
const APInt &NarrowConst = Case.getCaseValue()->getValue(); |
7268 |
const APInt &NarrowConst = Case.getCaseValue()->getValue(); |
| 7269 |
APInt WideConst = (ExtType == Instruction::ZExt) |
7269 |
APInt WideConst = (ExtType == Instruction::ZExt) |
| 7270 |
? NarrowConst.zext(RegWidth) |
7270 |
? NarrowConst.zext(RegWidth) |
| 7271 |
: NarrowConst.sext(RegWidth); |
7271 |
: NarrowConst.sext(RegWidth); |
| 7272 |
Case.setValue(ConstantInt::get(Context, WideConst)); |
7272 |
Case.setValue(ConstantInt::get(Context, WideConst)); |
| 7273 |
} |
7273 |
} |
| 7274 |
|
7274 |
|
| 7275 |
return true; |
7275 |
return true; |
| 7276 |
} |
7276 |
} |
| 7277 |
|
7277 |
|
| 7278 |
bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { |
7278 |
bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { |
| 7279 |
// The SCCP optimization tends to produce code like this: |
7279 |
// The SCCP optimization tends to produce code like this: |
| 7280 |
// switch(x) { case 42: phi(42, ...) } |
7280 |
// switch(x) { case 42: phi(42, ...) } |
| 7281 |
// Materializing the constant for the phi-argument needs instructions; So we |
7281 |
// Materializing the constant for the phi-argument needs instructions; So we |
| 7282 |
// change the code to: |
7282 |
// change the code to: |
| 7283 |
// switch(x) { case 42: phi(x, ...) } |
7283 |
// switch(x) { case 42: phi(x, ...) } |
| 7284 |
|
7284 |
|
| 7285 |
Value *Condition = SI->getCondition(); |
7285 |
Value *Condition = SI->getCondition(); |
| 7286 |
// Avoid endless loop in degenerate case. |
7286 |
// Avoid endless loop in degenerate case. |
| 7287 |
if (isa(*Condition)) |
7287 |
if (isa(*Condition)) |
| 7288 |
return false; |
7288 |
return false; |
| 7289 |
|
7289 |
|
| 7290 |
bool Changed = false; |
7290 |
bool Changed = false; |
| 7291 |
BasicBlock *SwitchBB = SI->getParent(); |
7291 |
BasicBlock *SwitchBB = SI->getParent(); |
| 7292 |
Type *ConditionType = Condition->getType(); |
7292 |
Type *ConditionType = Condition->getType(); |
| 7293 |
|
7293 |
|
| 7294 |
for (const SwitchInst::CaseHandle &Case : SI->cases()) { |
7294 |
for (const SwitchInst::CaseHandle &Case : SI->cases()) { |
| 7295 |
ConstantInt *CaseValue = Case.getCaseValue(); |
7295 |
ConstantInt *CaseValue = Case.getCaseValue(); |
| 7296 |
BasicBlock *CaseBB = Case.getCaseSuccessor(); |
7296 |
BasicBlock *CaseBB = Case.getCaseSuccessor(); |
| 7297 |
// Set to true if we previously checked that `CaseBB` is only reached by |
7297 |
// Set to true if we previously checked that `CaseBB` is only reached by |
| 7298 |
// a single case from this switch. |
7298 |
// a single case from this switch. |
| 7299 |
bool CheckedForSinglePred = false; |
7299 |
bool CheckedForSinglePred = false; |
| 7300 |
for (PHINode &PHI : CaseBB->phis()) { |
7300 |
for (PHINode &PHI : CaseBB->phis()) { |
| 7301 |
Type *PHIType = PHI.getType(); |
7301 |
Type *PHIType = PHI.getType(); |
| 7302 |
// If ZExt is free then we can also catch patterns like this: |
7302 |
// If ZExt is free then we can also catch patterns like this: |
| 7303 |
// switch((i32)x) { case 42: phi((i64)42, ...); } |
7303 |
// switch((i32)x) { case 42: phi((i64)42, ...); } |
| 7304 |
// and replace `(i64)42` with `zext i32 %x to i64`. |
7304 |
// and replace `(i64)42` with `zext i32 %x to i64`. |
| 7305 |
bool TryZExt = |
7305 |
bool TryZExt = |
| 7306 |
PHIType->isIntegerTy() && |
7306 |
PHIType->isIntegerTy() && |
| 7307 |
PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() && |
7307 |
PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() && |
| 7308 |
TLI->isZExtFree(ConditionType, PHIType); |
7308 |
TLI->isZExtFree(ConditionType, PHIType); |
| 7309 |
if (PHIType == ConditionType || TryZExt) { |
7309 |
if (PHIType == ConditionType || TryZExt) { |
| 7310 |
// Set to true to skip this case because of multiple preds. |
7310 |
// Set to true to skip this case because of multiple preds. |
| 7311 |
bool SkipCase = false; |
7311 |
bool SkipCase = false; |
| 7312 |
Value *Replacement = nullptr; |
7312 |
Value *Replacement = nullptr; |
| 7313 |
for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { |
7313 |
for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { |
| 7314 |
Value *PHIValue = PHI.getIncomingValue(I); |
7314 |
Value *PHIValue = PHI.getIncomingValue(I); |
| 7315 |
if (PHIValue != CaseValue) { |
7315 |
if (PHIValue != CaseValue) { |
| 7316 |
if (!TryZExt) |
7316 |
if (!TryZExt) |
| 7317 |
continue; |
7317 |
continue; |
| 7318 |
ConstantInt *PHIValueInt = dyn_cast(PHIValue); |
7318 |
ConstantInt *PHIValueInt = dyn_cast(PHIValue); |
| 7319 |
if (!PHIValueInt || |
7319 |
if (!PHIValueInt || |
| 7320 |
PHIValueInt->getValue() != |
7320 |
PHIValueInt->getValue() != |
| 7321 |
CaseValue->getValue().zext(PHIType->getIntegerBitWidth())) |
7321 |
CaseValue->getValue().zext(PHIType->getIntegerBitWidth())) |
| 7322 |
continue; |
7322 |
continue; |
| 7323 |
} |
7323 |
} |
| 7324 |
if (PHI.getIncomingBlock(I) != SwitchBB) |
7324 |
if (PHI.getIncomingBlock(I) != SwitchBB) |
| 7325 |
continue; |
7325 |
continue; |
| 7326 |
// We cannot optimize if there are multiple case labels jumping to |
7326 |
// We cannot optimize if there are multiple case labels jumping to |
| 7327 |
// this block. This check may get expensive when there are many |
7327 |
// this block. This check may get expensive when there are many |
| 7328 |
// case labels so we test for it last. |
7328 |
// case labels so we test for it last. |
| 7329 |
if (!CheckedForSinglePred) { |
7329 |
if (!CheckedForSinglePred) { |
| 7330 |
CheckedForSinglePred = true; |
7330 |
CheckedForSinglePred = true; |
| 7331 |
if (SI->findCaseDest(CaseBB) == nullptr) { |
7331 |
if (SI->findCaseDest(CaseBB) == nullptr) { |
| 7332 |
SkipCase = true; |
7332 |
SkipCase = true; |
| 7333 |
break; |
7333 |
break; |
| 7334 |
} |
7334 |
} |
| 7335 |
} |
7335 |
} |
| 7336 |
|
7336 |
|
| 7337 |
if (Replacement == nullptr) { |
7337 |
if (Replacement == nullptr) { |
| 7338 |
if (PHIValue == CaseValue) { |
7338 |
if (PHIValue == CaseValue) { |
| 7339 |
Replacement = Condition; |
7339 |
Replacement = Condition; |
| 7340 |
} else { |
7340 |
} else { |
| 7341 |
IRBuilder<> Builder(SI); |
7341 |
IRBuilder<> Builder(SI); |
| 7342 |
Replacement = Builder.CreateZExt(Condition, PHIType); |
7342 |
Replacement = Builder.CreateZExt(Condition, PHIType); |
| 7343 |
} |
7343 |
} |
| 7344 |
} |
7344 |
} |
| 7345 |
PHI.setIncomingValue(I, Replacement); |
7345 |
PHI.setIncomingValue(I, Replacement); |
| 7346 |
Changed = true; |
7346 |
Changed = true; |
| 7347 |
} |
7347 |
} |
| 7348 |
if (SkipCase) |
7348 |
if (SkipCase) |
| 7349 |
break; |
7349 |
break; |
| 7350 |
} |
7350 |
} |
| 7351 |
} |
7351 |
} |
| 7352 |
} |
7352 |
} |
| 7353 |
return Changed; |
7353 |
return Changed; |
| 7354 |
} |
7354 |
} |
| 7355 |
|
7355 |
|
| 7356 |
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { |
7356 |
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { |
| 7357 |
bool Changed = optimizeSwitchType(SI); |
7357 |
bool Changed = optimizeSwitchType(SI); |
| 7358 |
Changed |= optimizeSwitchPhiConstants(SI); |
7358 |
Changed |= optimizeSwitchPhiConstants(SI); |
| 7359 |
return Changed; |
7359 |
return Changed; |
| 7360 |
} |
7360 |
} |
| 7361 |
|
7361 |
|
| 7362 |
namespace { |
7362 |
namespace { |
| 7363 |
|
7363 |
|
| 7364 |
/// Helper class to promote a scalar operation to a vector one. |
7364 |
/// Helper class to promote a scalar operation to a vector one. |
| 7365 |
/// This class is used to move downward extractelement transition. |
7365 |
/// This class is used to move downward extractelement transition. |
| 7366 |
/// E.g., |
7366 |
/// E.g., |
| 7367 |
/// a = vector_op <2 x i32> |
7367 |
/// a = vector_op <2 x i32> |
| 7368 |
/// b = extractelement <2 x i32> a, i32 0 |
7368 |
/// b = extractelement <2 x i32> a, i32 0 |
| 7369 |
/// c = scalar_op b |
7369 |
/// c = scalar_op b |
| 7370 |
/// store c |
7370 |
/// store c |
| 7371 |
/// |
7371 |
/// |
| 7372 |
/// => |
7372 |
/// => |
| 7373 |
/// a = vector_op <2 x i32> |
7373 |
/// a = vector_op <2 x i32> |
| 7374 |
/// c = vector_op a (equivalent to scalar_op on the related lane) |
7374 |
/// c = vector_op a (equivalent to scalar_op on the related lane) |
| 7375 |
/// * d = extractelement <2 x i32> c, i32 0 |
7375 |
/// * d = extractelement <2 x i32> c, i32 0 |
| 7376 |
/// * store d |
7376 |
/// * store d |
| 7377 |
/// Assuming both extractelement and store can be combine, we get rid of the |
7377 |
/// Assuming both extractelement and store can be combine, we get rid of the |
| 7378 |
/// transition. |
7378 |
/// transition. |
| 7379 |
class VectorPromoteHelper { |
7379 |
class VectorPromoteHelper { |
| 7380 |
/// DataLayout associated with the current module. |
7380 |
/// DataLayout associated with the current module. |
| 7381 |
const DataLayout &DL; |
7381 |
const DataLayout &DL; |
| 7382 |
|
7382 |
|
| 7383 |
/// Used to perform some checks on the legality of vector operations. |
7383 |
/// Used to perform some checks on the legality of vector operations. |
| 7384 |
const TargetLowering &TLI; |
7384 |
const TargetLowering &TLI; |
| 7385 |
|
7385 |
|
| 7386 |
/// Used to estimated the cost of the promoted chain. |
7386 |
/// Used to estimated the cost of the promoted chain. |
| 7387 |
const TargetTransformInfo &TTI; |
7387 |
const TargetTransformInfo &TTI; |
| 7388 |
|
7388 |
|
| 7389 |
/// The transition being moved downwards. |
7389 |
/// The transition being moved downwards. |
| 7390 |
Instruction *Transition; |
7390 |
Instruction *Transition; |
| 7391 |
|
7391 |
|
| 7392 |
/// The sequence of instructions to be promoted. |
7392 |
/// The sequence of instructions to be promoted. |
| 7393 |
SmallVector InstsToBePromoted; |
7393 |
SmallVector InstsToBePromoted; |
| 7394 |
|
7394 |
|
| 7395 |
/// Cost of combining a store and an extract. |
7395 |
/// Cost of combining a store and an extract. |
| 7396 |
unsigned StoreExtractCombineCost; |
7396 |
unsigned StoreExtractCombineCost; |
| 7397 |
|
7397 |
|
| 7398 |
/// Instruction that will be combined with the transition. |
7398 |
/// Instruction that will be combined with the transition. |
| 7399 |
Instruction *CombineInst = nullptr; |
7399 |
Instruction *CombineInst = nullptr; |
| 7400 |
|
7400 |
|
| 7401 |
/// The instruction that represents the current end of the transition. |
7401 |
/// The instruction that represents the current end of the transition. |
| 7402 |
/// Since we are faking the promotion until we reach the end of the chain |
7402 |
/// Since we are faking the promotion until we reach the end of the chain |
| 7403 |
/// of computation, we need a way to get the current end of the transition. |
7403 |
/// of computation, we need a way to get the current end of the transition. |
| 7404 |
Instruction *getEndOfTransition() const { |
7404 |
Instruction *getEndOfTransition() const { |
| 7405 |
if (InstsToBePromoted.empty()) |
7405 |
if (InstsToBePromoted.empty()) |
| 7406 |
return Transition; |
7406 |
return Transition; |
| 7407 |
return InstsToBePromoted.back(); |
7407 |
return InstsToBePromoted.back(); |
| 7408 |
} |
7408 |
} |
| 7409 |
|
7409 |
|
| 7410 |
/// Return the index of the original value in the transition. |
7410 |
/// Return the index of the original value in the transition. |
| 7411 |
/// E.g., for "extractelement <2 x i32> c, i32 1" the original value, |
7411 |
/// E.g., for "extractelement <2 x i32> c, i32 1" the original value, |
| 7412 |
/// c, is at index 0. |
7412 |
/// c, is at index 0. |
| 7413 |
unsigned getTransitionOriginalValueIdx() const { |
7413 |
unsigned getTransitionOriginalValueIdx() const { |
| 7414 |
assert(isa(Transition) && |
7414 |
assert(isa(Transition) && |
| 7415 |
"Other kind of transitions are not supported yet"); |
7415 |
"Other kind of transitions are not supported yet"); |
| 7416 |
return 0; |
7416 |
return 0; |
| 7417 |
} |
7417 |
} |
| 7418 |
|
7418 |
|
| 7419 |
/// Return the index of the index in the transition. |
7419 |
/// Return the index of the index in the transition. |
| 7420 |
/// E.g., for "extractelement <2 x i32> c, i32 0" the index |
7420 |
/// E.g., for "extractelement <2 x i32> c, i32 0" the index |
| 7421 |
/// is at index 1. |
7421 |
/// is at index 1. |
| 7422 |
unsigned getTransitionIdx() const { |
7422 |
unsigned getTransitionIdx() const { |
| 7423 |
assert(isa(Transition) && |
7423 |
assert(isa(Transition) && |
| 7424 |
"Other kind of transitions are not supported yet"); |
7424 |
"Other kind of transitions are not supported yet"); |
| 7425 |
return 1; |
7425 |
return 1; |
| 7426 |
} |
7426 |
} |
| 7427 |
|
7427 |
|
| 7428 |
/// Get the type of the transition. |
7428 |
/// Get the type of the transition. |
| 7429 |
/// This is the type of the original value. |
7429 |
/// This is the type of the original value. |
| 7430 |
/// E.g., for "extractelement <2 x i32> c, i32 1" the type of the |
7430 |
/// E.g., for "extractelement <2 x i32> c, i32 1" the type of the |
| 7431 |
/// transition is <2 x i32>. |
7431 |
/// transition is <2 x i32>. |
| 7432 |
Type *getTransitionType() const { |
7432 |
Type *getTransitionType() const { |
| 7433 |
return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); |
7433 |
return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); |
| 7434 |
} |
7434 |
} |
| 7435 |
|
7435 |
|
| 7436 |
/// Promote \p ToBePromoted by moving \p Def downward through. |
7436 |
/// Promote \p ToBePromoted by moving \p Def downward through. |
| 7437 |
/// I.e., we have the following sequence: |
7437 |
/// I.e., we have the following sequence: |
| 7438 |
/// Def = Transition a to |
7438 |
/// Def = Transition a to |
| 7439 |
/// b = ToBePromoted Def, ... |
7439 |
/// b = ToBePromoted Def, ... |
| 7440 |
/// => |
7440 |
/// => |
| 7441 |
/// b = ToBePromoted a, ... |
7441 |
/// b = ToBePromoted a, ... |
| 7442 |
/// Def = Transition ToBePromoted to |
7442 |
/// Def = Transition ToBePromoted to |
| 7443 |
void promoteImpl(Instruction *ToBePromoted); |
7443 |
void promoteImpl(Instruction *ToBePromoted); |
| 7444 |
|
7444 |
|
| 7445 |
/// Check whether or not it is profitable to promote all the |
7445 |
/// Check whether or not it is profitable to promote all the |
| 7446 |
/// instructions enqueued to be promoted. |
7446 |
/// instructions enqueued to be promoted. |
| 7447 |
bool isProfitableToPromote() { |
7447 |
bool isProfitableToPromote() { |
| 7448 |
Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); |
7448 |
Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); |
| 7449 |
unsigned Index = isa(ValIdx) |
7449 |
unsigned Index = isa(ValIdx) |
| 7450 |
? cast(ValIdx)->getZExtValue() |
7450 |
? cast(ValIdx)->getZExtValue() |
| 7451 |
: -1; |
7451 |
: -1; |
| 7452 |
Type *PromotedType = getTransitionType(); |
7452 |
Type *PromotedType = getTransitionType(); |
| 7453 |
|
7453 |
|
| 7454 |
StoreInst *ST = cast(CombineInst); |
7454 |
StoreInst *ST = cast(CombineInst); |
| 7455 |
unsigned AS = ST->getPointerAddressSpace(); |
7455 |
unsigned AS = ST->getPointerAddressSpace(); |
| 7456 |
// Check if this store is supported. |
7456 |
// Check if this store is supported. |
| 7457 |
if (!TLI.allowsMisalignedMemoryAccesses( |
7457 |
if (!TLI.allowsMisalignedMemoryAccesses( |
| 7458 |
TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, |
7458 |
TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, |
| 7459 |
ST->getAlign())) { |
7459 |
ST->getAlign())) { |
| 7460 |
// If this is not supported, there is no way we can combine |
7460 |
// If this is not supported, there is no way we can combine |
| 7461 |
// the extract with the store. |
7461 |
// the extract with the store. |
| 7462 |
return false; |
7462 |
return false; |
| 7463 |
} |
7463 |
} |
| 7464 |
|
7464 |
|
| 7465 |
// The scalar chain of computation has to pay for the transition |
7465 |
// The scalar chain of computation has to pay for the transition |
| 7466 |
// scalar to vector. |
7466 |
// scalar to vector. |
| 7467 |
// The vector chain has to account for the combining cost. |
7467 |
// The vector chain has to account for the combining cost. |
| 7468 |
enum TargetTransformInfo::TargetCostKind CostKind = |
7468 |
enum TargetTransformInfo::TargetCostKind CostKind = |
| 7469 |
TargetTransformInfo::TCK_RecipThroughput; |
7469 |
TargetTransformInfo::TCK_RecipThroughput; |
| 7470 |
InstructionCost ScalarCost = |
7470 |
InstructionCost ScalarCost = |
| 7471 |
TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index); |
7471 |
TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index); |
| 7472 |
InstructionCost VectorCost = StoreExtractCombineCost; |
7472 |
InstructionCost VectorCost = StoreExtractCombineCost; |
| 7473 |
for (const auto &Inst : InstsToBePromoted) { |
7473 |
for (const auto &Inst : InstsToBePromoted) { |
| 7474 |
// Compute the cost. |
7474 |
// Compute the cost. |
| 7475 |
// By construction, all instructions being promoted are arithmetic ones. |
7475 |
// By construction, all instructions being promoted are arithmetic ones. |
| 7476 |
// Moreover, one argument is a constant that can be viewed as a splat |
7476 |
// Moreover, one argument is a constant that can be viewed as a splat |
| 7477 |
// constant. |
7477 |
// constant. |
| 7478 |
Value *Arg0 = Inst->getOperand(0); |
7478 |
Value *Arg0 = Inst->getOperand(0); |
| 7479 |
bool IsArg0Constant = isa(Arg0) || isa(Arg0) || |
7479 |
bool IsArg0Constant = isa(Arg0) || isa(Arg0) || |
| 7480 |
isa(Arg0); |
7480 |
isa(Arg0); |
| 7481 |
TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info; |
7481 |
TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info; |
| 7482 |
if (IsArg0Constant) |
7482 |
if (IsArg0Constant) |
| 7483 |
Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue; |
7483 |
Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue; |
| 7484 |
else |
7484 |
else |
| 7485 |
Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue; |
7485 |
Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue; |
| 7486 |
|
7486 |
|
| 7487 |
ScalarCost += TTI.getArithmeticInstrCost( |
7487 |
ScalarCost += TTI.getArithmeticInstrCost( |
| 7488 |
Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info); |
7488 |
Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info); |
| 7489 |
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, |
7489 |
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, |
| 7490 |
CostKind, Arg0Info, Arg1Info); |
7490 |
CostKind, Arg0Info, Arg1Info); |
| 7491 |
} |
7491 |
} |
| 7492 |
LLVM_DEBUG( |
7492 |
LLVM_DEBUG( |
| 7493 |
dbgs() << "Estimated cost of computation to be promoted:\nScalar: " |
7493 |
dbgs() << "Estimated cost of computation to be promoted:\nScalar: " |
| 7494 |
<< ScalarCost << "\nVector: " << VectorCost << '\n'); |
7494 |
<< ScalarCost << "\nVector: " << VectorCost << '\n'); |
| 7495 |
return ScalarCost > VectorCost; |
7495 |
return ScalarCost > VectorCost; |
| 7496 |
} |
7496 |
} |
| 7497 |
|
7497 |
|
| 7498 |
/// Generate a constant vector with \p Val with the same |
7498 |
/// Generate a constant vector with \p Val with the same |
| 7499 |
/// number of elements as the transition. |
7499 |
/// number of elements as the transition. |
| 7500 |
/// \p UseSplat defines whether or not \p Val should be replicated |
7500 |
/// \p UseSplat defines whether or not \p Val should be replicated |
| 7501 |
/// across the whole vector. |
7501 |
/// across the whole vector. |
| 7502 |
/// In other words, if UseSplat == true, we generate , |
7502 |
/// In other words, if UseSplat == true, we generate , |
| 7503 |
/// otherwise we generate a vector with as many undef as possible: |
7503 |
/// otherwise we generate a vector with as many undef as possible: |
| 7504 |
/// where \p Val is only |
7504 |
/// where \p Val is only |
| 7505 |
/// used at the index of the extract. |
7505 |
/// used at the index of the extract. |
| 7506 |
Value *getConstantVector(Constant *Val, bool UseSplat) const { |
7506 |
Value *getConstantVector(Constant *Val, bool UseSplat) const { |
| 7507 |
unsigned ExtractIdx = std::numeric_limits::max(); |
7507 |
unsigned ExtractIdx = std::numeric_limits::max(); |
| 7508 |
if (!UseSplat) { |
7508 |
if (!UseSplat) { |
| 7509 |
// If we cannot determine where the constant must be, we have to |
7509 |
// If we cannot determine where the constant must be, we have to |
| 7510 |
// use a splat constant. |
7510 |
// use a splat constant. |
| 7511 |
Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); |
7511 |
Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); |
| 7512 |
if (ConstantInt *CstVal = dyn_cast(ValExtractIdx)) |
7512 |
if (ConstantInt *CstVal = dyn_cast(ValExtractIdx)) |
| 7513 |
ExtractIdx = CstVal->getSExtValue(); |
7513 |
ExtractIdx = CstVal->getSExtValue(); |
| 7514 |
else |
7514 |
else |
| 7515 |
UseSplat = true; |
7515 |
UseSplat = true; |
| 7516 |
} |
7516 |
} |
| 7517 |
|
7517 |
|
| 7518 |
ElementCount EC = cast(getTransitionType())->getElementCount(); |
7518 |
ElementCount EC = cast(getTransitionType())->getElementCount(); |
| 7519 |
if (UseSplat) |
7519 |
if (UseSplat) |
| 7520 |
return ConstantVector::getSplat(EC, Val); |
7520 |
return ConstantVector::getSplat(EC, Val); |
| 7521 |
|
7521 |
|
| 7522 |
if (!EC.isScalable()) { |
7522 |
if (!EC.isScalable()) { |
| 7523 |
SmallVector ConstVec; |
7523 |
SmallVector ConstVec; |
| 7524 |
UndefValue *UndefVal = UndefValue::get(Val->getType()); |
7524 |
UndefValue *UndefVal = UndefValue::get(Val->getType()); |
| 7525 |
for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { |
7525 |
for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { |
| 7526 |
if (Idx == ExtractIdx) |
7526 |
if (Idx == ExtractIdx) |
| 7527 |
ConstVec.push_back(Val); |
7527 |
ConstVec.push_back(Val); |
| 7528 |
else |
7528 |
else |
| 7529 |
ConstVec.push_back(UndefVal); |
7529 |
ConstVec.push_back(UndefVal); |
| 7530 |
} |
7530 |
} |
| 7531 |
return ConstantVector::get(ConstVec); |
7531 |
return ConstantVector::get(ConstVec); |
| 7532 |
} else |
7532 |
} else |
| 7533 |
llvm_unreachable( |
7533 |
llvm_unreachable( |
| 7534 |
"Generate scalable vector for non-splat is unimplemented"); |
7534 |
"Generate scalable vector for non-splat is unimplemented"); |
| 7535 |
} |
7535 |
} |
| 7536 |
|
7536 |
|
| 7537 |
/// Check if promoting to a vector type an operand at \p OperandIdx |
7537 |
/// Check if promoting to a vector type an operand at \p OperandIdx |
| 7538 |
/// in \p Use can trigger undefined behavior. |
7538 |
/// in \p Use can trigger undefined behavior. |
| 7539 |
static bool canCauseUndefinedBehavior(const Instruction *Use, |
7539 |
static bool canCauseUndefinedBehavior(const Instruction *Use, |
| 7540 |
unsigned OperandIdx) { |
7540 |
unsigned OperandIdx) { |
| 7541 |
// This is not safe to introduce undef when the operand is on |
7541 |
// This is not safe to introduce undef when the operand is on |
| 7542 |
// the right hand side of a division-like instruction. |
7542 |
// the right hand side of a division-like instruction. |
| 7543 |
if (OperandIdx != 1) |
7543 |
if (OperandIdx != 1) |
| 7544 |
return false; |
7544 |
return false; |
| 7545 |
switch (Use->getOpcode()) { |
7545 |
switch (Use->getOpcode()) { |
| 7546 |
default: |
7546 |
default: |
| 7547 |
return false; |
7547 |
return false; |
| 7548 |
case Instruction::SDiv: |
7548 |
case Instruction::SDiv: |
| 7549 |
case Instruction::UDiv: |
7549 |
case Instruction::UDiv: |
| 7550 |
case Instruction::SRem: |
7550 |
case Instruction::SRem: |
| 7551 |
case Instruction::URem: |
7551 |
case Instruction::URem: |
| 7552 |
return true; |
7552 |
return true; |
| 7553 |
case Instruction::FDiv: |
7553 |
case Instruction::FDiv: |
| 7554 |
case Instruction::FRem: |
7554 |
case Instruction::FRem: |
| 7555 |
return !Use->hasNoNaNs(); |
7555 |
return !Use->hasNoNaNs(); |
| 7556 |
} |
7556 |
} |
| 7557 |
llvm_unreachable(nullptr); |
7557 |
llvm_unreachable(nullptr); |
| 7558 |
} |
7558 |
} |
| 7559 |
|
7559 |
|
| 7560 |
public: |
7560 |
public: |
| 7561 |
VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, |
7561 |
VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, |
| 7562 |
const TargetTransformInfo &TTI, Instruction *Transition, |
7562 |
const TargetTransformInfo &TTI, Instruction *Transition, |
| 7563 |
unsigned CombineCost) |
7563 |
unsigned CombineCost) |
| 7564 |
: DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), |
7564 |
: DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), |
| 7565 |
StoreExtractCombineCost(CombineCost) { |
7565 |
StoreExtractCombineCost(CombineCost) { |
| 7566 |
assert(Transition && "Do not know how to promote null"); |
7566 |
assert(Transition && "Do not know how to promote null"); |
| 7567 |
} |
7567 |
} |
| 7568 |
|
7568 |
|
| 7569 |
/// Check if we can promote \p ToBePromoted to \p Type. |
7569 |
/// Check if we can promote \p ToBePromoted to \p Type. |
| 7570 |
bool canPromote(const Instruction *ToBePromoted) const { |
7570 |
bool canPromote(const Instruction *ToBePromoted) const { |
| 7571 |
// We could support CastInst too. |
7571 |
// We could support CastInst too. |
| 7572 |
return isa(ToBePromoted); |
7572 |
return isa(ToBePromoted); |
| 7573 |
} |
7573 |
} |
| 7574 |
|
7574 |
|
| 7575 |
/// Check if it is profitable to promote \p ToBePromoted |
7575 |
/// Check if it is profitable to promote \p ToBePromoted |
| 7576 |
/// by moving downward the transition through. |
7576 |
/// by moving downward the transition through. |
| 7577 |
bool shouldPromote(const Instruction *ToBePromoted) const { |
7577 |
bool shouldPromote(const Instruction *ToBePromoted) const { |
| 7578 |
// Promote only if all the operands can be statically expanded. |
7578 |
// Promote only if all the operands can be statically expanded. |
| 7579 |
// Indeed, we do not want to introduce any new kind of transitions. |
7579 |
// Indeed, we do not want to introduce any new kind of transitions. |
| 7580 |
for (const Use &U : ToBePromoted->operands()) { |
7580 |
for (const Use &U : ToBePromoted->operands()) { |
| 7581 |
const Value *Val = U.get(); |
7581 |
const Value *Val = U.get(); |
| 7582 |
if (Val == getEndOfTransition()) { |
7582 |
if (Val == getEndOfTransition()) { |
| 7583 |
// If the use is a division and the transition is on the rhs, |
7583 |
// If the use is a division and the transition is on the rhs, |
| 7584 |
// we cannot promote the operation, otherwise we may create a |
7584 |
// we cannot promote the operation, otherwise we may create a |
| 7585 |
// division by zero. |
7585 |
// division by zero. |
| 7586 |
if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) |
7586 |
if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) |
| 7587 |
return false; |
7587 |
return false; |
| 7588 |
continue; |
7588 |
continue; |
| 7589 |
} |
7589 |
} |
| 7590 |
if (!isa(Val) && !isa(Val) && |
7590 |
if (!isa(Val) && !isa(Val) && |
| 7591 |
!isa(Val)) |
7591 |
!isa(Val)) |
| 7592 |
return false; |
7592 |
return false; |
| 7593 |
} |
7593 |
} |
| 7594 |
// Check that the resulting operation is legal. |
7594 |
// Check that the resulting operation is legal. |
| 7595 |
int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); |
7595 |
int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); |
| 7596 |
if (!ISDOpcode) |
7596 |
if (!ISDOpcode) |
| 7597 |
return false; |
7597 |
return false; |
| 7598 |
return StressStoreExtract || |
7598 |
return StressStoreExtract || |
| 7599 |
TLI.isOperationLegalOrCustom( |
7599 |
TLI.isOperationLegalOrCustom( |
| 7600 |
ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); |
7600 |
ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); |
| 7601 |
} |
7601 |
} |
| 7602 |
|
7602 |
|
| 7603 |
/// Check whether or not \p Use can be combined |
7603 |
/// Check whether or not \p Use can be combined |
| 7604 |
/// with the transition. |
7604 |
/// with the transition. |
| 7605 |
/// I.e., is it possible to do Use(Transition) => AnotherUse? |
7605 |
/// I.e., is it possible to do Use(Transition) => AnotherUse? |
| 7606 |
bool canCombine(const Instruction *Use) { return isa(Use); } |
7606 |
bool canCombine(const Instruction *Use) { return isa(Use); } |
| 7607 |
|
7607 |
|
| 7608 |
/// Record \p ToBePromoted as part of the chain to be promoted. |
7608 |
/// Record \p ToBePromoted as part of the chain to be promoted. |
| 7609 |
void enqueueForPromotion(Instruction *ToBePromoted) { |
7609 |
void enqueueForPromotion(Instruction *ToBePromoted) { |
| 7610 |
InstsToBePromoted.push_back(ToBePromoted); |
7610 |
InstsToBePromoted.push_back(ToBePromoted); |
| 7611 |
} |
7611 |
} |
| 7612 |
|
7612 |
|
| 7613 |
/// Set the instruction that will be combined with the transition. |
7613 |
/// Set the instruction that will be combined with the transition. |
| 7614 |
void recordCombineInstruction(Instruction *ToBeCombined) { |
7614 |
void recordCombineInstruction(Instruction *ToBeCombined) { |
| 7615 |
assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); |
7615 |
assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); |
| 7616 |
CombineInst = ToBeCombined; |
7616 |
CombineInst = ToBeCombined; |
| 7617 |
} |
7617 |
} |
| 7618 |
|
7618 |
|
| 7619 |
/// Promote all the instructions enqueued for promotion if it is |
7619 |
/// Promote all the instructions enqueued for promotion if it is |
| 7620 |
/// is profitable. |
7620 |
/// is profitable. |
| 7621 |
/// \return True if the promotion happened, false otherwise. |
7621 |
/// \return True if the promotion happened, false otherwise. |
| 7622 |
bool promote() { |
7622 |
bool promote() { |
| 7623 |
// Check if there is something to promote. |
7623 |
// Check if there is something to promote. |
| 7624 |
// Right now, if we do not have anything to combine with, |
7624 |
// Right now, if we do not have anything to combine with, |
| 7625 |
// we assume the promotion is not profitable. |
7625 |
// we assume the promotion is not profitable. |
| 7626 |
if (InstsToBePromoted.empty() || !CombineInst) |
7626 |
if (InstsToBePromoted.empty() || !CombineInst) |
| 7627 |
return false; |
7627 |
return false; |
| 7628 |
|
7628 |
|
| 7629 |
// Check cost. |
7629 |
// Check cost. |
| 7630 |
if (!StressStoreExtract && !isProfitableToPromote()) |
7630 |
if (!StressStoreExtract && !isProfitableToPromote()) |
| 7631 |
return false; |
7631 |
return false; |
| 7632 |
|
7632 |
|
| 7633 |
// Promote. |
7633 |
// Promote. |
| 7634 |
for (auto &ToBePromoted : InstsToBePromoted) |
7634 |
for (auto &ToBePromoted : InstsToBePromoted) |
| 7635 |
promoteImpl(ToBePromoted); |
7635 |
promoteImpl(ToBePromoted); |
| 7636 |
InstsToBePromoted.clear(); |
7636 |
InstsToBePromoted.clear(); |
| 7637 |
return true; |
7637 |
return true; |
| 7638 |
} |
7638 |
} |
| 7639 |
}; |
7639 |
}; |
| 7640 |
|
7640 |
|
| 7641 |
} // end anonymous namespace |
7641 |
} // end anonymous namespace |
| 7642 |
|
7642 |
|
| 7643 |
void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { |
7643 |
void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { |
| 7644 |
// At this point, we know that all the operands of ToBePromoted but Def |
7644 |
// At this point, we know that all the operands of ToBePromoted but Def |
| 7645 |
// can be statically promoted. |
7645 |
// can be statically promoted. |
| 7646 |
// For Def, we need to use its parameter in ToBePromoted: |
7646 |
// For Def, we need to use its parameter in ToBePromoted: |
| 7647 |
// b = ToBePromoted ty1 a |
7647 |
// b = ToBePromoted ty1 a |
| 7648 |
// Def = Transition ty1 b to ty2 |
7648 |
// Def = Transition ty1 b to ty2 |
| 7649 |
// Move the transition down. |
7649 |
// Move the transition down. |
| 7650 |
// 1. Replace all uses of the promoted operation by the transition. |
7650 |
// 1. Replace all uses of the promoted operation by the transition. |
| 7651 |
// = ... b => = ... Def. |
7651 |
// = ... b => = ... Def. |
| 7652 |
assert(ToBePromoted->getType() == Transition->getType() && |
7652 |
assert(ToBePromoted->getType() == Transition->getType() && |
| 7653 |
"The type of the result of the transition does not match " |
7653 |
"The type of the result of the transition does not match " |
| 7654 |
"the final type"); |
7654 |
"the final type"); |
| 7655 |
ToBePromoted->replaceAllUsesWith(Transition); |
7655 |
ToBePromoted->replaceAllUsesWith(Transition); |
| 7656 |
// 2. Update the type of the uses. |
7656 |
// 2. Update the type of the uses. |
| 7657 |
// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. |
7657 |
// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. |
| 7658 |
Type *TransitionTy = getTransitionType(); |
7658 |
Type *TransitionTy = getTransitionType(); |
| 7659 |
ToBePromoted->mutateType(TransitionTy); |
7659 |
ToBePromoted->mutateType(TransitionTy); |
| 7660 |
// 3. Update all the operands of the promoted operation with promoted |
7660 |
// 3. Update all the operands of the promoted operation with promoted |
| 7661 |
// operands. |
7661 |
// operands. |
| 7662 |
// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. |
7662 |
// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. |
| 7663 |
for (Use &U : ToBePromoted->operands()) { |
7663 |
for (Use &U : ToBePromoted->operands()) { |
| 7664 |
Value *Val = U.get(); |
7664 |
Value *Val = U.get(); |
| 7665 |
Value *NewVal = nullptr; |
7665 |
Value *NewVal = nullptr; |
| 7666 |
if (Val == Transition) |
7666 |
if (Val == Transition) |
| 7667 |
NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); |
7667 |
NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); |
| 7668 |
else if (isa(Val) || isa(Val) || |
7668 |
else if (isa(Val) || isa(Val) || |
| 7669 |
isa(Val)) { |
7669 |
isa(Val)) { |
| 7670 |
// Use a splat constant if it is not safe to use undef. |
7670 |
// Use a splat constant if it is not safe to use undef. |
| 7671 |
NewVal = getConstantVector( |
7671 |
NewVal = getConstantVector( |
| 7672 |
cast(Val), |
7672 |
cast(Val), |
| 7673 |
isa(Val) || |
7673 |
isa(Val) || |
| 7674 |
canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); |
7674 |
canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); |
| 7675 |
} else |
7675 |
} else |
| 7676 |
llvm_unreachable("Did you modified shouldPromote and forgot to update " |
7676 |
llvm_unreachable("Did you modified shouldPromote and forgot to update " |
| 7677 |
"this?"); |
7677 |
"this?"); |
| 7678 |
ToBePromoted->setOperand(U.getOperandNo(), NewVal); |
7678 |
ToBePromoted->setOperand(U.getOperandNo(), NewVal); |
| 7679 |
} |
7679 |
} |
| 7680 |
Transition->moveAfter(ToBePromoted); |
7680 |
Transition->moveAfter(ToBePromoted); |
| 7681 |
Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); |
7681 |
Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); |
| 7682 |
} |
7682 |
} |
| 7683 |
|
7683 |
|
| 7684 |
/// Some targets can do store(extractelement) with one instruction. |
7684 |
/// Some targets can do store(extractelement) with one instruction. |
| 7685 |
/// Try to push the extractelement towards the stores when the target |
7685 |
/// Try to push the extractelement towards the stores when the target |
| 7686 |
/// has this feature and this is profitable. |
7686 |
/// has this feature and this is profitable. |
| 7687 |
bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { |
7687 |
bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { |
| 7688 |
unsigned CombineCost = std::numeric_limits::max(); |
7688 |
unsigned CombineCost = std::numeric_limits::max(); |
| 7689 |
if (DisableStoreExtract || |
7689 |
if (DisableStoreExtract || |
| 7690 |
(!StressStoreExtract && |
7690 |
(!StressStoreExtract && |
| 7691 |
!TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), |
7691 |
!TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), |
| 7692 |
Inst->getOperand(1), CombineCost))) |
7692 |
Inst->getOperand(1), CombineCost))) |
| 7693 |
return false; |
7693 |
return false; |
| 7694 |
|
7694 |
|
| 7695 |
// At this point we know that Inst is a vector to scalar transition. |
7695 |
// At this point we know that Inst is a vector to scalar transition. |
| 7696 |
// Try to move it down the def-use chain, until: |
7696 |
// Try to move it down the def-use chain, until: |
| 7697 |
// - We can combine the transition with its single use |
7697 |
// - We can combine the transition with its single use |
| 7698 |
// => we got rid of the transition. |
7698 |
// => we got rid of the transition. |
| 7699 |
// - We escape the current basic block |
7699 |
// - We escape the current basic block |
| 7700 |
// => we would need to check that we are moving it at a cheaper place and |
7700 |
// => we would need to check that we are moving it at a cheaper place and |
| 7701 |
// we do not do that for now. |
7701 |
// we do not do that for now. |
| 7702 |
BasicBlock *Parent = Inst->getParent(); |
7702 |
BasicBlock *Parent = Inst->getParent(); |
| 7703 |
LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); |
7703 |
LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); |
| 7704 |
VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); |
7704 |
VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); |
| 7705 |
// If the transition has more than one use, assume this is not going to be |
7705 |
// If the transition has more than one use, assume this is not going to be |
| 7706 |
// beneficial. |
7706 |
// beneficial. |
| 7707 |
while (Inst->hasOneUse()) { |
7707 |
while (Inst->hasOneUse()) { |
| 7708 |
Instruction *ToBePromoted = cast(*Inst->user_begin()); |
7708 |
Instruction *ToBePromoted = cast(*Inst->user_begin()); |
| 7709 |
LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); |
7709 |
LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); |
| 7710 |
|
7710 |
|
| 7711 |
if (ToBePromoted->getParent() != Parent) { |
7711 |
if (ToBePromoted->getParent() != Parent) { |
| 7712 |
LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block (" |
7712 |
LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block (" |
| 7713 |
<< ToBePromoted->getParent()->getName() |
7713 |
<< ToBePromoted->getParent()->getName() |
| 7714 |
<< ") than the transition (" << Parent->getName() |
7714 |
<< ") than the transition (" << Parent->getName() |
| 7715 |
<< ").\n"); |
7715 |
<< ").\n"); |
| 7716 |
return false; |
7716 |
return false; |
| 7717 |
} |
7717 |
} |
| 7718 |
|
7718 |
|
| 7719 |
if (VPH.canCombine(ToBePromoted)) { |
7719 |
if (VPH.canCombine(ToBePromoted)) { |
| 7720 |
LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n' |
7720 |
LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n' |
| 7721 |
<< "will be combined with: " << *ToBePromoted << '\n'); |
7721 |
<< "will be combined with: " << *ToBePromoted << '\n'); |
| 7722 |
VPH.recordCombineInstruction(ToBePromoted); |
7722 |
VPH.recordCombineInstruction(ToBePromoted); |
| 7723 |
bool Changed = VPH.promote(); |
7723 |
bool Changed = VPH.promote(); |
| 7724 |
NumStoreExtractExposed += Changed; |
7724 |
NumStoreExtractExposed += Changed; |
| 7725 |
return Changed; |
7725 |
return Changed; |
| 7726 |
} |
7726 |
} |
| 7727 |
|
7727 |
|
| 7728 |
LLVM_DEBUG(dbgs() << "Try promoting.\n"); |
7728 |
LLVM_DEBUG(dbgs() << "Try promoting.\n"); |
| 7729 |
if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) |
7729 |
if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) |
| 7730 |
return false; |
7730 |
return false; |
| 7731 |
|
7731 |
|
| 7732 |
LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); |
7732 |
LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); |
| 7733 |
|
7733 |
|
| 7734 |
VPH.enqueueForPromotion(ToBePromoted); |
7734 |
VPH.enqueueForPromotion(ToBePromoted); |
| 7735 |
Inst = ToBePromoted; |
7735 |
Inst = ToBePromoted; |
| 7736 |
} |
7736 |
} |
| 7737 |
return false; |
7737 |
return false; |
| 7738 |
} |
7738 |
} |
| 7739 |
|
7739 |
|
| 7740 |
/// For the instruction sequence of store below, F and I values |
7740 |
/// For the instruction sequence of store below, F and I values |
| 7741 |
/// are bundled together as an i64 value before being stored into memory. |
7741 |
/// are bundled together as an i64 value before being stored into memory. |
| 7742 |
/// Sometimes it is more efficient to generate separate stores for F and I, |
7742 |
/// Sometimes it is more efficient to generate separate stores for F and I, |
| 7743 |
/// which can remove the bitwise instructions or sink them to colder places. |
7743 |
/// which can remove the bitwise instructions or sink them to colder places. |
| 7744 |
/// |
7744 |
/// |
| 7745 |
/// (store (or (zext (bitcast F to i32) to i64), |
7745 |
/// (store (or (zext (bitcast F to i32) to i64), |
| 7746 |
/// (shl (zext I to i64), 32)), addr) --> |
7746 |
/// (shl (zext I to i64), 32)), addr) --> |
| 7747 |
/// (store F, addr) and (store I, addr+4) |
7747 |
/// (store F, addr) and (store I, addr+4) |
| 7748 |
/// |
7748 |
/// |
| 7749 |
/// Similarly, splitting for other merged store can also be beneficial, like: |
7749 |
/// Similarly, splitting for other merged store can also be beneficial, like: |
| 7750 |
/// For pair of {i32, i32}, i64 store --> two i32 stores. |
7750 |
/// For pair of {i32, i32}, i64 store --> two i32 stores. |
| 7751 |
/// For pair of {i32, i16}, i64 store --> two i32 stores. |
7751 |
/// For pair of {i32, i16}, i64 store --> two i32 stores. |
| 7752 |
/// For pair of {i16, i16}, i32 store --> two i16 stores. |
7752 |
/// For pair of {i16, i16}, i32 store --> two i16 stores. |
| 7753 |
/// For pair of {i16, i8}, i32 store --> two i16 stores. |
7753 |
/// For pair of {i16, i8}, i32 store --> two i16 stores. |
| 7754 |
/// For pair of {i8, i8}, i16 store --> two i8 stores. |
7754 |
/// For pair of {i8, i8}, i16 store --> two i8 stores. |
| 7755 |
/// |
7755 |
/// |
| 7756 |
/// We allow each target to determine specifically which kind of splitting is |
7756 |
/// We allow each target to determine specifically which kind of splitting is |
| 7757 |
/// supported. |
7757 |
/// supported. |
| 7758 |
/// |
7758 |
/// |
| 7759 |
/// The store patterns are commonly seen from the simple code snippet below |
7759 |
/// The store patterns are commonly seen from the simple code snippet below |
| 7760 |
/// if only std::make_pair(...) is sroa transformed before inlined into hoo. |
7760 |
/// if only std::make_pair(...) is sroa transformed before inlined into hoo. |
| 7761 |
/// void goo(const std::pair &); |
7761 |
/// void goo(const std::pair &); |
| 7762 |
/// hoo() { |
7762 |
/// hoo() { |
| 7763 |
/// ... |
7763 |
/// ... |
| 7764 |
/// goo(std::make_pair(tmp, ftmp)); |
7764 |
/// goo(std::make_pair(tmp, ftmp)); |
| 7765 |
/// ... |
7765 |
/// ... |
| 7766 |
/// } |
7766 |
/// } |
| 7767 |
/// |
7767 |
/// |
| 7768 |
/// Although we already have similar splitting in DAG Combine, we duplicate |
7768 |
/// Although we already have similar splitting in DAG Combine, we duplicate |
| 7769 |
/// it in CodeGenPrepare to catch the case in which pattern is across |
7769 |
/// it in CodeGenPrepare to catch the case in which pattern is across |
| 7770 |
/// multiple BBs. The logic in DAG Combine is kept to catch case generated |
7770 |
/// multiple BBs. The logic in DAG Combine is kept to catch case generated |
| 7771 |
/// during code expansion. |
7771 |
/// during code expansion. |
| 7772 |
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, |
7772 |
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, |
| 7773 |
const TargetLowering &TLI) { |
7773 |
const TargetLowering &TLI) { |
| 7774 |
// Handle simple but common cases only. |
7774 |
// Handle simple but common cases only. |
| 7775 |
Type *StoreType = SI.getValueOperand()->getType(); |
7775 |
Type *StoreType = SI.getValueOperand()->getType(); |
| 7776 |
|
7776 |
|
| 7777 |
// The code below assumes shifting a value by , |
7777 |
// The code below assumes shifting a value by , |
| 7778 |
// whereas scalable vectors would have to be shifted by |
7778 |
// whereas scalable vectors would have to be shifted by |
| 7779 |
// <2log(vscale) + number of bits> in order to store the |
7779 |
// <2log(vscale) + number of bits> in order to store the |
| 7780 |
// low/high parts. Bailing out for now. |
7780 |
// low/high parts. Bailing out for now. |
| 7781 |
if (StoreType->isScalableTy()) |
7781 |
if (StoreType->isScalableTy()) |
| 7782 |
return false; |
7782 |
return false; |
| 7783 |
|
7783 |
|
| 7784 |
if (!DL.typeSizeEqualsStoreSize(StoreType) || |
7784 |
if (!DL.typeSizeEqualsStoreSize(StoreType) || |
| 7785 |
DL.getTypeSizeInBits(StoreType) == 0) |
7785 |
DL.getTypeSizeInBits(StoreType) == 0) |
| 7786 |
return false; |
7786 |
return false; |
| 7787 |
|
7787 |
|
| 7788 |
unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; |
7788 |
unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; |
| 7789 |
Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); |
7789 |
Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); |
| 7790 |
if (!DL.typeSizeEqualsStoreSize(SplitStoreType)) |
7790 |
if (!DL.typeSizeEqualsStoreSize(SplitStoreType)) |
| 7791 |
return false; |
7791 |
return false; |
| 7792 |
|
7792 |
|
| 7793 |
// Don't split the store if it is volatile. |
7793 |
// Don't split the store if it is volatile. |
| 7794 |
if (SI.isVolatile()) |
7794 |
if (SI.isVolatile()) |
| 7795 |
return false; |
7795 |
return false; |
| 7796 |
|
7796 |
|
| 7797 |
// Match the following patterns: |
7797 |
// Match the following patterns: |
| 7798 |
// (store (or (zext LValue to i64), |
7798 |
// (store (or (zext LValue to i64), |
| 7799 |
// (shl (zext HValue to i64), 32)), HalfValBitSize) |
7799 |
// (shl (zext HValue to i64), 32)), HalfValBitSize) |
| 7800 |
// or |
7800 |
// or |
| 7801 |
// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) |
7801 |
// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) |
| 7802 |
// (zext LValue to i64), |
7802 |
// (zext LValue to i64), |
| 7803 |
// Expect both operands of OR and the first operand of SHL have only |
7803 |
// Expect both operands of OR and the first operand of SHL have only |
| 7804 |
// one use. |
7804 |
// one use. |
| 7805 |
Value *LValue, *HValue; |
7805 |
Value *LValue, *HValue; |
| 7806 |
if (!match(SI.getValueOperand(), |
7806 |
if (!match(SI.getValueOperand(), |
| 7807 |
m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), |
7807 |
m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), |
| 7808 |
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), |
7808 |
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), |
| 7809 |
m_SpecificInt(HalfValBitSize)))))) |
7809 |
m_SpecificInt(HalfValBitSize)))))) |
| 7810 |
return false; |
7810 |
return false; |
| 7811 |
|
7811 |
|
| 7812 |
// Check LValue and HValue are int with size less or equal than 32. |
7812 |
// Check LValue and HValue are int with size less or equal than 32. |
| 7813 |
if (!LValue->getType()->isIntegerTy() || |
7813 |
if (!LValue->getType()->isIntegerTy() || |
| 7814 |
DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || |
7814 |
DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || |
| 7815 |
!HValue->getType()->isIntegerTy() || |
7815 |
!HValue->getType()->isIntegerTy() || |
| 7816 |
DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) |
7816 |
DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) |
| 7817 |
return false; |
7817 |
return false; |
| 7818 |
|
7818 |
|
| 7819 |
// If LValue/HValue is a bitcast instruction, use the EVT before bitcast |
7819 |
// If LValue/HValue is a bitcast instruction, use the EVT before bitcast |
| 7820 |
// as the input of target query. |
7820 |
// as the input of target query. |
| 7821 |
auto *LBC = dyn_cast(LValue); |
7821 |
auto *LBC = dyn_cast(LValue); |
| 7822 |
auto *HBC = dyn_cast(HValue); |
7822 |
auto *HBC = dyn_cast(HValue); |
| 7823 |
EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) |
7823 |
EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) |
| 7824 |
: EVT::getEVT(LValue->getType()); |
7824 |
: EVT::getEVT(LValue->getType()); |
| 7825 |
EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) |
7825 |
EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) |
| 7826 |
: EVT::getEVT(HValue->getType()); |
7826 |
: EVT::getEVT(HValue->getType()); |
| 7827 |
if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) |
7827 |
if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) |
| 7828 |
return false; |
7828 |
return false; |
| 7829 |
|
7829 |
|
| 7830 |
// Start to split store. |
7830 |
// Start to split store. |
| 7831 |
IRBuilder<> Builder(SI.getContext()); |
7831 |
IRBuilder<> Builder(SI.getContext()); |
| 7832 |
Builder.SetInsertPoint(&SI); |
7832 |
Builder.SetInsertPoint(&SI); |
| 7833 |
|
7833 |
|
| 7834 |
// If LValue/HValue is a bitcast in another BB, create a new one in current |
7834 |
// If LValue/HValue is a bitcast in another BB, create a new one in current |
| 7835 |
// BB so it may be merged with the splitted stores by dag combiner. |
7835 |
// BB so it may be merged with the splitted stores by dag combiner. |
| 7836 |
if (LBC && LBC->getParent() != SI.getParent()) |
7836 |
if (LBC && LBC->getParent() != SI.getParent()) |
| 7837 |
LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); |
7837 |
LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); |
| 7838 |
if (HBC && HBC->getParent() != SI.getParent()) |
7838 |
if (HBC && HBC->getParent() != SI.getParent()) |
| 7839 |
HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); |
7839 |
HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); |
| 7840 |
|
7840 |
|
| 7841 |
bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); |
7841 |
bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); |
| 7842 |
auto CreateSplitStore = [&](Value *V, bool Upper) { |
7842 |
auto CreateSplitStore = [&](Value *V, bool Upper) { |
| 7843 |
V = Builder.CreateZExtOrBitCast(V, SplitStoreType); |
7843 |
V = Builder.CreateZExtOrBitCast(V, SplitStoreType); |
| 7844 |
Value *Addr = Builder.CreateBitCast( |
7844 |
Value *Addr = Builder.CreateBitCast( |
| 7845 |
SI.getOperand(1), |
7845 |
SI.getOperand(1), |
| 7846 |
SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); |
7846 |
SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); |
| 7847 |
Align Alignment = SI.getAlign(); |
7847 |
Align Alignment = SI.getAlign(); |
| 7848 |
const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper); |
7848 |
const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper); |
| 7849 |
if (IsOffsetStore) { |
7849 |
if (IsOffsetStore) { |
| 7850 |
Addr = Builder.CreateGEP( |
7850 |
Addr = Builder.CreateGEP( |
| 7851 |
SplitStoreType, Addr, |
7851 |
SplitStoreType, Addr, |
| 7852 |
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); |
7852 |
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); |
| 7853 |
|
7853 |
|
| 7854 |
// When splitting the store in half, naturally one half will retain the |
7854 |
// When splitting the store in half, naturally one half will retain the |
| 7855 |
// alignment of the original wider store, regardless of whether it was |
7855 |
// alignment of the original wider store, regardless of whether it was |
| 7856 |
// over-aligned or not, while the other will require adjustment. |
7856 |
// over-aligned or not, while the other will require adjustment. |
| 7857 |
Alignment = commonAlignment(Alignment, HalfValBitSize / 8); |
7857 |
Alignment = commonAlignment(Alignment, HalfValBitSize / 8); |
| 7858 |
} |
7858 |
} |
| 7859 |
Builder.CreateAlignedStore(V, Addr, Alignment); |
7859 |
Builder.CreateAlignedStore(V, Addr, Alignment); |
| 7860 |
}; |
7860 |
}; |
| 7861 |
|
7861 |
|
| 7862 |
CreateSplitStore(LValue, false); |
7862 |
CreateSplitStore(LValue, false); |
| 7863 |
CreateSplitStore(HValue, true); |
7863 |
CreateSplitStore(HValue, true); |
| 7864 |
|
7864 |
|
| 7865 |
// Delete the old store. |
7865 |
// Delete the old store. |
| 7866 |
SI.eraseFromParent(); |
7866 |
SI.eraseFromParent(); |
| 7867 |
return true; |
7867 |
return true; |
| 7868 |
} |
7868 |
} |
| 7869 |
|
7869 |
|
| 7870 |
// Return true if the GEP has two operands, the first operand is of a sequential |
7870 |
// Return true if the GEP has two operands, the first operand is of a sequential |
| 7871 |
// type, and the second operand is a constant. |
7871 |
// type, and the second operand is a constant. |
| 7872 |
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { |
7872 |
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { |
| 7873 |
gep_type_iterator I = gep_type_begin(*GEP); |
7873 |
gep_type_iterator I = gep_type_begin(*GEP); |
| 7874 |
return GEP->getNumOperands() == 2 && I.isSequential() && |
7874 |
return GEP->getNumOperands() == 2 && I.isSequential() && |
| 7875 |
isa(GEP->getOperand(1)); |
7875 |
isa(GEP->getOperand(1)); |
| 7876 |
} |
7876 |
} |
| 7877 |
|
7877 |
|
| 7878 |
// Try unmerging GEPs to reduce liveness interference (register pressure) across |
7878 |
// Try unmerging GEPs to reduce liveness interference (register pressure) across |
| 7879 |
// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, |
7879 |
// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, |
| 7880 |
// reducing liveness interference across those edges benefits global register |
7880 |
// reducing liveness interference across those edges benefits global register |
| 7881 |
// allocation. Currently handles only certain cases. |
7881 |
// allocation. Currently handles only certain cases. |
| 7882 |
// |
7882 |
// |
| 7883 |
// For example, unmerge %GEPI and %UGEPI as below. |
7883 |
// For example, unmerge %GEPI and %UGEPI as below. |
| 7884 |
// |
7884 |
// |
| 7885 |
// ---------- BEFORE ---------- |
7885 |
// ---------- BEFORE ---------- |
| 7886 |
// SrcBlock: |
7886 |
// SrcBlock: |
| 7887 |
// ... |
7887 |
// ... |
| 7888 |
// %GEPIOp = ... |
7888 |
// %GEPIOp = ... |
| 7889 |
// ... |
7889 |
// ... |
| 7890 |
// %GEPI = gep %GEPIOp, Idx |
7890 |
// %GEPI = gep %GEPIOp, Idx |
| 7891 |
// ... |
7891 |
// ... |
| 7892 |
// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] |
7892 |
// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] |
| 7893 |
// (* %GEPI is alive on the indirectbr edges due to other uses ahead) |
7893 |
// (* %GEPI is alive on the indirectbr edges due to other uses ahead) |
| 7894 |
// (* %GEPIOp is alive on the indirectbr edges only because of it's used by |
7894 |
// (* %GEPIOp is alive on the indirectbr edges only because of it's used by |
| 7895 |
// %UGEPI) |
7895 |
// %UGEPI) |
| 7896 |
// |
7896 |
// |
| 7897 |
// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) |
7897 |
// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) |
| 7898 |
// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) |
7898 |
// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) |
| 7899 |
// ... |
7899 |
// ... |
| 7900 |
// |
7900 |
// |
| 7901 |
// DstBi: |
7901 |
// DstBi: |
| 7902 |
// ... |
7902 |
// ... |
| 7903 |
// %UGEPI = gep %GEPIOp, UIdx |
7903 |
// %UGEPI = gep %GEPIOp, UIdx |
| 7904 |
// ... |
7904 |
// ... |
| 7905 |
// --------------------------- |
7905 |
// --------------------------- |
| 7906 |
// |
7906 |
// |
| 7907 |
// ---------- AFTER ---------- |
7907 |
// ---------- AFTER ---------- |
| 7908 |
// SrcBlock: |
7908 |
// SrcBlock: |
| 7909 |
// ... (same as above) |
7909 |
// ... (same as above) |
| 7910 |
// (* %GEPI is still alive on the indirectbr edges) |
7910 |
// (* %GEPI is still alive on the indirectbr edges) |
| 7911 |
// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the |
7911 |
// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the |
| 7912 |
// unmerging) |
7912 |
// unmerging) |
| 7913 |
// ... |
7913 |
// ... |
| 7914 |
// |
7914 |
// |
| 7915 |
// DstBi: |
7915 |
// DstBi: |
| 7916 |
// ... |
7916 |
// ... |
| 7917 |
// %UGEPI = gep %GEPI, (UIdx-Idx) |
7917 |
// %UGEPI = gep %GEPI, (UIdx-Idx) |
| 7918 |
// ... |
7918 |
// ... |
| 7919 |
// --------------------------- |
7919 |
// --------------------------- |
| 7920 |
// |
7920 |
// |
| 7921 |
// The register pressure on the IndirectBr edges is reduced because %GEPIOp is |
7921 |
// The register pressure on the IndirectBr edges is reduced because %GEPIOp is |
| 7922 |
// no longer alive on them. |
7922 |
// no longer alive on them. |
| 7923 |
// |
7923 |
// |
| 7924 |
// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging |
7924 |
// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging |
| 7925 |
// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as |
7925 |
// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as |
| 7926 |
// not to disable further simplications and optimizations as a result of GEP |
7926 |
// not to disable further simplications and optimizations as a result of GEP |
| 7927 |
// merging. |
7927 |
// merging. |
| 7928 |
// |
7928 |
// |
| 7929 |
// Note this unmerging may increase the length of the data flow critical path |
7929 |
// Note this unmerging may increase the length of the data flow critical path |
| 7930 |
// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff |
7930 |
// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff |
| 7931 |
// between the register pressure and the length of data-flow critical |
7931 |
// between the register pressure and the length of data-flow critical |
| 7932 |
// path. Restricting this to the uncommon IndirectBr case would minimize the |
7932 |
// path. Restricting this to the uncommon IndirectBr case would minimize the |
| 7933 |
// impact of potentially longer critical path, if any, and the impact on compile |
7933 |
// impact of potentially longer critical path, if any, and the impact on compile |
| 7934 |
// time. |
7934 |
// time. |
| 7935 |
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, |
7935 |
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, |
| 7936 |
const TargetTransformInfo *TTI) { |
7936 |
const TargetTransformInfo *TTI) { |
| 7937 |
BasicBlock *SrcBlock = GEPI->getParent(); |
7937 |
BasicBlock *SrcBlock = GEPI->getParent(); |
| 7938 |
// Check that SrcBlock ends with an IndirectBr. If not, give up. The common |
7938 |
// Check that SrcBlock ends with an IndirectBr. If not, give up. The common |
| 7939 |
// (non-IndirectBr) cases exit early here. |
7939 |
// (non-IndirectBr) cases exit early here. |
| 7940 |
if (!isa(SrcBlock->getTerminator())) |
7940 |
if (!isa(SrcBlock->getTerminator())) |
| 7941 |
return false; |
7941 |
return false; |
| 7942 |
// Check that GEPI is a simple gep with a single constant index. |
7942 |
// Check that GEPI is a simple gep with a single constant index. |
| 7943 |
if (!GEPSequentialConstIndexed(GEPI)) |
7943 |
if (!GEPSequentialConstIndexed(GEPI)) |
| 7944 |
return false; |
7944 |
return false; |
| 7945 |
ConstantInt *GEPIIdx = cast(GEPI->getOperand(1)); |
7945 |
ConstantInt *GEPIIdx = cast(GEPI->getOperand(1)); |
| 7946 |
// Check that GEPI is a cheap one. |
7946 |
// Check that GEPI is a cheap one. |
| 7947 |
if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), |
7947 |
if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), |
| 7948 |
TargetTransformInfo::TCK_SizeAndLatency) > |
7948 |
TargetTransformInfo::TCK_SizeAndLatency) > |
| 7949 |
TargetTransformInfo::TCC_Basic) |
7949 |
TargetTransformInfo::TCC_Basic) |
| 7950 |
return false; |
7950 |
return false; |
| 7951 |
Value *GEPIOp = GEPI->getOperand(0); |
7951 |
Value *GEPIOp = GEPI->getOperand(0); |
| 7952 |
// Check that GEPIOp is an instruction that's also defined in SrcBlock. |
7952 |
// Check that GEPIOp is an instruction that's also defined in SrcBlock. |
| 7953 |
if (!isa(GEPIOp)) |
7953 |
if (!isa(GEPIOp)) |
| 7954 |
return false; |
7954 |
return false; |
| 7955 |
auto *GEPIOpI = cast(GEPIOp); |
7955 |
auto *GEPIOpI = cast(GEPIOp); |
| 7956 |
if (GEPIOpI->getParent() != SrcBlock) |
7956 |
if (GEPIOpI->getParent() != SrcBlock) |
| 7957 |
return false; |
7957 |
return false; |
| 7958 |
// Check that GEP is used outside the block, meaning it's alive on the |
7958 |
// Check that GEP is used outside the block, meaning it's alive on the |
| 7959 |
// IndirectBr edge(s). |
7959 |
// IndirectBr edge(s). |
| 7960 |
if (llvm::none_of(GEPI->users(), [&](User *Usr) { |
7960 |
if (llvm::none_of(GEPI->users(), [&](User *Usr) { |
| 7961 |
if (auto *I = dyn_cast(Usr)) { |
7961 |
if (auto *I = dyn_cast(Usr)) { |
| 7962 |
if (I->getParent() != SrcBlock) { |
7962 |
if (I->getParent() != SrcBlock) { |
| 7963 |
return true; |
7963 |
return true; |
| 7964 |
} |
7964 |
} |
| 7965 |
} |
7965 |
} |
| 7966 |
return false; |
7966 |
return false; |
| 7967 |
})) |
7967 |
})) |
| 7968 |
return false; |
7968 |
return false; |
| 7969 |
// The second elements of the GEP chains to be unmerged. |
7969 |
// The second elements of the GEP chains to be unmerged. |
| 7970 |
std::vector UGEPIs; |
7970 |
std::vector UGEPIs; |
| 7971 |
// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive |
7971 |
// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive |
| 7972 |
// on IndirectBr edges. |
7972 |
// on IndirectBr edges. |
| 7973 |
for (User *Usr : GEPIOp->users()) { |
7973 |
for (User *Usr : GEPIOp->users()) { |
| 7974 |
if (Usr == GEPI) |
7974 |
if (Usr == GEPI) |
| 7975 |
continue; |
7975 |
continue; |
| 7976 |
// Check if Usr is an Instruction. If not, give up. |
7976 |
// Check if Usr is an Instruction. If not, give up. |
| 7977 |
if (!isa(Usr)) |
7977 |
if (!isa(Usr)) |
| 7978 |
return false; |
7978 |
return false; |
| 7979 |
auto *UI = cast(Usr); |
7979 |
auto *UI = cast(Usr); |
| 7980 |
// Check if Usr in the same block as GEPIOp, which is fine, skip. |
7980 |
// Check if Usr in the same block as GEPIOp, which is fine, skip. |
| 7981 |
if (UI->getParent() == SrcBlock) |
7981 |
if (UI->getParent() == SrcBlock) |
| 7982 |
continue; |
7982 |
continue; |
| 7983 |
// Check if Usr is a GEP. If not, give up. |
7983 |
// Check if Usr is a GEP. If not, give up. |
| 7984 |
if (!isa(Usr)) |
7984 |
if (!isa(Usr)) |
| 7985 |
return false; |
7985 |
return false; |
| 7986 |
auto *UGEPI = cast(Usr); |
7986 |
auto *UGEPI = cast(Usr); |
| 7987 |
// Check if UGEPI is a simple gep with a single constant index and GEPIOp is |
7987 |
// Check if UGEPI is a simple gep with a single constant index and GEPIOp is |
| 7988 |
// the pointer operand to it. If so, record it in the vector. If not, give |
7988 |
// the pointer operand to it. If so, record it in the vector. If not, give |
| 7989 |
// up. |
7989 |
// up. |
| 7990 |
if (!GEPSequentialConstIndexed(UGEPI)) |
7990 |
if (!GEPSequentialConstIndexed(UGEPI)) |
| 7991 |
return false; |
7991 |
return false; |
| 7992 |
if (UGEPI->getOperand(0) != GEPIOp) |
7992 |
if (UGEPI->getOperand(0) != GEPIOp) |
| 7993 |
return false; |
7993 |
return false; |
| 7994 |
if (GEPIIdx->getType() != |
7994 |
if (GEPIIdx->getType() != |
| 7995 |
cast(UGEPI->getOperand(1))->getType()) |
7995 |
cast(UGEPI->getOperand(1))->getType()) |
| 7996 |
return false; |
7996 |
return false; |
| 7997 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
7997 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
| 7998 |
if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), |
7998 |
if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), |
| 7999 |
TargetTransformInfo::TCK_SizeAndLatency) > |
7999 |
TargetTransformInfo::TCK_SizeAndLatency) > |
| 8000 |
TargetTransformInfo::TCC_Basic) |
8000 |
TargetTransformInfo::TCC_Basic) |
| 8001 |
return false; |
8001 |
return false; |
| 8002 |
UGEPIs.push_back(UGEPI); |
8002 |
UGEPIs.push_back(UGEPI); |
| 8003 |
} |
8003 |
} |
| 8004 |
if (UGEPIs.size() == 0) |
8004 |
if (UGEPIs.size() == 0) |
| 8005 |
return false; |
8005 |
return false; |
| 8006 |
// Check the materializing cost of (Uidx-Idx). |
8006 |
// Check the materializing cost of (Uidx-Idx). |
| 8007 |
for (GetElementPtrInst *UGEPI : UGEPIs) { |
8007 |
for (GetElementPtrInst *UGEPI : UGEPIs) { |
| 8008 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
8008 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
| 8009 |
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); |
8009 |
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); |
| 8010 |
InstructionCost ImmCost = TTI->getIntImmCost( |
8010 |
InstructionCost ImmCost = TTI->getIntImmCost( |
| 8011 |
NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency); |
8011 |
NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency); |
| 8012 |
if (ImmCost > TargetTransformInfo::TCC_Basic) |
8012 |
if (ImmCost > TargetTransformInfo::TCC_Basic) |
| 8013 |
return false; |
8013 |
return false; |
| 8014 |
} |
8014 |
} |
| 8015 |
// Now unmerge between GEPI and UGEPIs. |
8015 |
// Now unmerge between GEPI and UGEPIs. |
| 8016 |
for (GetElementPtrInst *UGEPI : UGEPIs) { |
8016 |
for (GetElementPtrInst *UGEPI : UGEPIs) { |
| 8017 |
UGEPI->setOperand(0, GEPI); |
8017 |
UGEPI->setOperand(0, GEPI); |
| 8018 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
8018 |
ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); |
| 8019 |
Constant *NewUGEPIIdx = ConstantInt::get( |
8019 |
Constant *NewUGEPIIdx = ConstantInt::get( |
| 8020 |
GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue()); |
8020 |
GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue()); |
| 8021 |
UGEPI->setOperand(1, NewUGEPIIdx); |
8021 |
UGEPI->setOperand(1, NewUGEPIIdx); |
| 8022 |
// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not |
8022 |
// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not |
| 8023 |
// inbounds to avoid UB. |
8023 |
// inbounds to avoid UB. |
| 8024 |
if (!GEPI->isInBounds()) { |
8024 |
if (!GEPI->isInBounds()) { |
| 8025 |
UGEPI->setIsInBounds(false); |
8025 |
UGEPI->setIsInBounds(false); |
| 8026 |
} |
8026 |
} |
| 8027 |
} |
8027 |
} |
| 8028 |
// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not |
8028 |
// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not |
| 8029 |
// alive on IndirectBr edges). |
8029 |
// alive on IndirectBr edges). |
| 8030 |
assert(llvm::none_of(GEPIOp->users(), |
8030 |
assert(llvm::none_of(GEPIOp->users(), |
| 8031 |
[&](User *Usr) { |
8031 |
[&](User *Usr) { |
| 8032 |
return cast(Usr)->getParent() != SrcBlock; |
8032 |
return cast(Usr)->getParent() != SrcBlock; |
| 8033 |
}) && |
8033 |
}) && |
| 8034 |
"GEPIOp is used outside SrcBlock"); |
8034 |
"GEPIOp is used outside SrcBlock"); |
| 8035 |
return true; |
8035 |
return true; |
| 8036 |
} |
8036 |
} |
| 8037 |
|
8037 |
|
| 8038 |
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, |
8038 |
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, |
| 8039 |
SmallSet &FreshBBs, |
8039 |
SmallSet &FreshBBs, |
| 8040 |
bool IsHugeFunc) { |
8040 |
bool IsHugeFunc) { |
| 8041 |
// Try and convert |
8041 |
// Try and convert |
| 8042 |
// %c = icmp ult %x, 8 |
8042 |
// %c = icmp ult %x, 8 |
| 8043 |
// br %c, bla, blb |
8043 |
// br %c, bla, blb |
| 8044 |
// %tc = lshr %x, 3 |
8044 |
// %tc = lshr %x, 3 |
| 8045 |
// to |
8045 |
// to |
| 8046 |
// %tc = lshr %x, 3 |
8046 |
// %tc = lshr %x, 3 |
| 8047 |
// %c = icmp eq %tc, 0 |
8047 |
// %c = icmp eq %tc, 0 |
| 8048 |
// br %c, bla, blb |
8048 |
// br %c, bla, blb |
| 8049 |
// Creating the cmp to zero can be better for the backend, especially if the |
8049 |
// Creating the cmp to zero can be better for the backend, especially if the |
| 8050 |
// lshr produces flags that can be used automatically. |
8050 |
// lshr produces flags that can be used automatically. |
| 8051 |
if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) |
8051 |
if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) |
| 8052 |
return false; |
8052 |
return false; |
| 8053 |
|
8053 |
|
| 8054 |
ICmpInst *Cmp = dyn_cast(Branch->getCondition()); |
8054 |
ICmpInst *Cmp = dyn_cast(Branch->getCondition()); |
| 8055 |
if (!Cmp || !isa(Cmp->getOperand(1)) || !Cmp->hasOneUse()) |
8055 |
if (!Cmp || !isa(Cmp->getOperand(1)) || !Cmp->hasOneUse()) |
| 8056 |
return false; |
8056 |
return false; |
| 8057 |
|
8057 |
|
| 8058 |
Value *X = Cmp->getOperand(0); |
8058 |
Value *X = Cmp->getOperand(0); |
| 8059 |
APInt CmpC = cast(Cmp->getOperand(1))->getValue(); |
8059 |
APInt CmpC = cast(Cmp->getOperand(1))->getValue(); |
| 8060 |
|
8060 |
|
| 8061 |
for (auto *U : X->users()) { |
8061 |
for (auto *U : X->users()) { |
| 8062 |
Instruction *UI = dyn_cast(U); |
8062 |
Instruction *UI = dyn_cast(U); |
| 8063 |
// A quick dominance check |
8063 |
// A quick dominance check |
| 8064 |
if (!UI || |
8064 |
if (!UI || |
| 8065 |
(UI->getParent() != Branch->getParent() && |
8065 |
(UI->getParent() != Branch->getParent() && |
| 8066 |
UI->getParent() != Branch->getSuccessor(0) && |
8066 |
UI->getParent() != Branch->getSuccessor(0) && |
| 8067 |
UI->getParent() != Branch->getSuccessor(1)) || |
8067 |
UI->getParent() != Branch->getSuccessor(1)) || |
| 8068 |
(UI->getParent() != Branch->getParent() && |
8068 |
(UI->getParent() != Branch->getParent() && |
| 8069 |
!UI->getParent()->getSinglePredecessor())) |
8069 |
!UI->getParent()->getSinglePredecessor())) |
| 8070 |
continue; |
8070 |
continue; |
| 8071 |
|
8071 |
|
| 8072 |
if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && |
8072 |
if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT && |
| 8073 |
match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { |
8073 |
match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) { |
| 8074 |
IRBuilder<> Builder(Branch); |
8074 |
IRBuilder<> Builder(Branch); |
| 8075 |
if (UI->getParent() != Branch->getParent()) |
8075 |
if (UI->getParent() != Branch->getParent()) |
| 8076 |
UI->moveBefore(Branch); |
8076 |
UI->moveBefore(Branch); |
| 8077 |
Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, |
8077 |
Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI, |
| 8078 |
ConstantInt::get(UI->getType(), 0)); |
8078 |
ConstantInt::get(UI->getType(), 0)); |
| 8079 |
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
8079 |
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
| 8080 |
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
8080 |
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
| 8081 |
replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); |
8081 |
replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); |
| 8082 |
return true; |
8082 |
return true; |
| 8083 |
} |
8083 |
} |
| 8084 |
if (Cmp->isEquality() && |
8084 |
if (Cmp->isEquality() && |
| 8085 |
(match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || |
8085 |
(match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || |
| 8086 |
match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { |
8086 |
match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { |
| 8087 |
IRBuilder<> Builder(Branch); |
8087 |
IRBuilder<> Builder(Branch); |
| 8088 |
if (UI->getParent() != Branch->getParent()) |
8088 |
if (UI->getParent() != Branch->getParent()) |
| 8089 |
UI->moveBefore(Branch); |
8089 |
UI->moveBefore(Branch); |
| 8090 |
Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, |
8090 |
Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI, |
| 8091 |
ConstantInt::get(UI->getType(), 0)); |
8091 |
ConstantInt::get(UI->getType(), 0)); |
| 8092 |
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
8092 |
LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n"); |
| 8093 |
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
8093 |
LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n"); |
| 8094 |
replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); |
8094 |
replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); |
| 8095 |
return true; |
8095 |
return true; |
| 8096 |
} |
8096 |
} |
| 8097 |
} |
8097 |
} |
| 8098 |
return false; |
8098 |
return false; |
| 8099 |
} |
8099 |
} |
| 8100 |
|
8100 |
|
| 8101 |
bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { |
8101 |
bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { |
| 8102 |
// Bail out if we inserted the instruction to prevent optimizations from |
8102 |
// Bail out if we inserted the instruction to prevent optimizations from |
| 8103 |
// stepping on each other's toes. |
8103 |
// stepping on each other's toes. |
| 8104 |
if (InsertedInsts.count(I)) |
8104 |
if (InsertedInsts.count(I)) |
| 8105 |
return false; |
8105 |
return false; |
| 8106 |
|
8106 |
|
| 8107 |
// TODO: Move into the switch on opcode below here. |
8107 |
// TODO: Move into the switch on opcode below here. |
| 8108 |
if (PHINode *P = dyn_cast(I)) { |
8108 |
if (PHINode *P = dyn_cast(I)) { |
| 8109 |
// It is possible for very late stage optimizations (such as SimplifyCFG) |
8109 |
// It is possible for very late stage optimizations (such as SimplifyCFG) |
| 8110 |
// to introduce PHI nodes too late to be cleaned up. If we detect such a |
8110 |
// to introduce PHI nodes too late to be cleaned up. If we detect such a |
| 8111 |
// trivial PHI, go ahead and zap it here. |
8111 |
// trivial PHI, go ahead and zap it here. |
| 8112 |
if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { |
8112 |
if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { |
| 8113 |
LargeOffsetGEPMap.erase(P); |
8113 |
LargeOffsetGEPMap.erase(P); |
| 8114 |
replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc); |
8114 |
replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc); |
| 8115 |
P->eraseFromParent(); |
8115 |
P->eraseFromParent(); |
| 8116 |
++NumPHIsElim; |
8116 |
++NumPHIsElim; |
| 8117 |
return true; |
8117 |
return true; |
| 8118 |
} |
8118 |
} |
| 8119 |
return false; |
8119 |
return false; |
| 8120 |
} |
8120 |
} |
| 8121 |
|
8121 |
|
| 8122 |
if (CastInst *CI = dyn_cast(I)) { |
8122 |
if (CastInst *CI = dyn_cast(I)) { |
| 8123 |
// If the source of the cast is a constant, then this should have |
8123 |
// If the source of the cast is a constant, then this should have |
| 8124 |
// already been constant folded. The only reason NOT to constant fold |
8124 |
// already been constant folded. The only reason NOT to constant fold |
| 8125 |
// it is if something (e.g. LSR) was careful to place the constant |
8125 |
// it is if something (e.g. LSR) was careful to place the constant |
| 8126 |
// evaluation in a block other than then one that uses it (e.g. to hoist |
8126 |
// evaluation in a block other than then one that uses it (e.g. to hoist |
| 8127 |
// the address of globals out of a loop). If this is the case, we don't |
8127 |
// the address of globals out of a loop). If this is the case, we don't |
| 8128 |
// want to forward-subst the cast. |
8128 |
// want to forward-subst the cast. |
| 8129 |
if (isa(CI->getOperand(0))) |
8129 |
if (isa(CI->getOperand(0))) |
| 8130 |
return false; |
8130 |
return false; |
| 8131 |
|
8131 |
|
| 8132 |
if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) |
8132 |
if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) |
| 8133 |
return true; |
8133 |
return true; |
| 8134 |
|
8134 |
|
| 8135 |
if ((isa(I) || isa(I) || isa(I)) && |
8135 |
if ((isa(I) || isa(I) || isa(I)) && |
| 8136 |
TLI->optimizeExtendOrTruncateConversion( |
8136 |
TLI->optimizeExtendOrTruncateConversion( |
| 8137 |
I, LI->getLoopFor(I->getParent()), *TTI)) |
8137 |
I, LI->getLoopFor(I->getParent()), *TTI)) |
| 8138 |
return true; |
8138 |
return true; |
| 8139 |
|
8139 |
|
| 8140 |
if (isa(I) || isa(I)) { |
8140 |
if (isa(I) || isa(I)) { |
| 8141 |
/// Sink a zext or sext into its user blocks if the target type doesn't |
8141 |
/// Sink a zext or sext into its user blocks if the target type doesn't |
| 8142 |
/// fit in one register |
8142 |
/// fit in one register |
| 8143 |
if (TLI->getTypeAction(CI->getContext(), |
8143 |
if (TLI->getTypeAction(CI->getContext(), |
| 8144 |
TLI->getValueType(*DL, CI->getType())) == |
8144 |
TLI->getValueType(*DL, CI->getType())) == |
| 8145 |
TargetLowering::TypeExpandInteger) { |
8145 |
TargetLowering::TypeExpandInteger) { |
| 8146 |
return SinkCast(CI); |
8146 |
return SinkCast(CI); |
| 8147 |
} else { |
8147 |
} else { |
| 8148 |
if (TLI->optimizeExtendOrTruncateConversion( |
8148 |
if (TLI->optimizeExtendOrTruncateConversion( |
| 8149 |
I, LI->getLoopFor(I->getParent()), *TTI)) |
8149 |
I, LI->getLoopFor(I->getParent()), *TTI)) |
| 8150 |
return true; |
8150 |
return true; |
| 8151 |
|
8151 |
|
| 8152 |
bool MadeChange = optimizeExt(I); |
8152 |
bool MadeChange = optimizeExt(I); |
| 8153 |
return MadeChange | optimizeExtUses(I); |
8153 |
return MadeChange | optimizeExtUses(I); |
| 8154 |
} |
8154 |
} |
| 8155 |
} |
8155 |
} |
| 8156 |
return false; |
8156 |
return false; |
| 8157 |
} |
8157 |
} |
| 8158 |
|
8158 |
|
| 8159 |
if (auto *Cmp = dyn_cast(I)) |
8159 |
if (auto *Cmp = dyn_cast(I)) |
| 8160 |
if (optimizeCmp(Cmp, ModifiedDT)) |
8160 |
if (optimizeCmp(Cmp, ModifiedDT)) |
| 8161 |
return true; |
8161 |
return true; |
| 8162 |
|
8162 |
|
| 8163 |
if (LoadInst *LI = dyn_cast(I)) { |
8163 |
if (LoadInst *LI = dyn_cast(I)) { |
| 8164 |
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); |
8164 |
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); |
| 8165 |
bool Modified = optimizeLoadExt(LI); |
8165 |
bool Modified = optimizeLoadExt(LI); |
| 8166 |
unsigned AS = LI->getPointerAddressSpace(); |
8166 |
unsigned AS = LI->getPointerAddressSpace(); |
| 8167 |
Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); |
8167 |
Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); |
| 8168 |
return Modified; |
8168 |
return Modified; |
| 8169 |
} |
8169 |
} |
| 8170 |
|
8170 |
|
| 8171 |
if (StoreInst *SI = dyn_cast(I)) { |
8171 |
if (StoreInst *SI = dyn_cast(I)) { |
| 8172 |
if (splitMergedValStore(*SI, *DL, *TLI)) |
8172 |
if (splitMergedValStore(*SI, *DL, *TLI)) |
| 8173 |
return true; |
8173 |
return true; |
| 8174 |
SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); |
8174 |
SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); |
| 8175 |
unsigned AS = SI->getPointerAddressSpace(); |
8175 |
unsigned AS = SI->getPointerAddressSpace(); |
| 8176 |
return optimizeMemoryInst(I, SI->getOperand(1), |
8176 |
return optimizeMemoryInst(I, SI->getOperand(1), |
| 8177 |
SI->getOperand(0)->getType(), AS); |
8177 |
SI->getOperand(0)->getType(), AS); |
| 8178 |
} |
8178 |
} |
| 8179 |
|
8179 |
|
| 8180 |
if (AtomicRMWInst *RMW = dyn_cast(I)) { |
8180 |
if (AtomicRMWInst *RMW = dyn_cast(I)) { |
| 8181 |
unsigned AS = RMW->getPointerAddressSpace(); |
8181 |
unsigned AS = RMW->getPointerAddressSpace(); |
| 8182 |
return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS); |
8182 |
return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS); |
| 8183 |
} |
8183 |
} |
| 8184 |
|
8184 |
|
| 8185 |
if (AtomicCmpXchgInst *CmpX = dyn_cast(I)) { |
8185 |
if (AtomicCmpXchgInst *CmpX = dyn_cast(I)) { |
| 8186 |
unsigned AS = CmpX->getPointerAddressSpace(); |
8186 |
unsigned AS = CmpX->getPointerAddressSpace(); |
| 8187 |
return optimizeMemoryInst(I, CmpX->getPointerOperand(), |
8187 |
return optimizeMemoryInst(I, CmpX->getPointerOperand(), |
| 8188 |
CmpX->getCompareOperand()->getType(), AS); |
8188 |
CmpX->getCompareOperand()->getType(), AS); |
| 8189 |
} |
8189 |
} |
| 8190 |
|
8190 |
|
| 8191 |
BinaryOperator *BinOp = dyn_cast(I); |
8191 |
BinaryOperator *BinOp = dyn_cast(I); |
| 8192 |
|
8192 |
|
| 8193 |
if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking && |
8193 |
if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking && |
| 8194 |
sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts)) |
8194 |
sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts)) |
| 8195 |
return true; |
8195 |
return true; |
| 8196 |
|
8196 |
|
| 8197 |
// TODO: Move this into the switch on opcode - it handles shifts already. |
8197 |
// TODO: Move this into the switch on opcode - it handles shifts already. |
| 8198 |
if (BinOp && (BinOp->getOpcode() == Instruction::AShr || |
8198 |
if (BinOp && (BinOp->getOpcode() == Instruction::AShr || |
| 8199 |
BinOp->getOpcode() == Instruction::LShr)) { |
8199 |
BinOp->getOpcode() == Instruction::LShr)) { |
| 8200 |
ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); |
8200 |
ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); |
| 8201 |
if (CI && TLI->hasExtractBitsInsn()) |
8201 |
if (CI && TLI->hasExtractBitsInsn()) |
| 8202 |
if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) |
8202 |
if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) |
| 8203 |
return true; |
8203 |
return true; |
| 8204 |
} |
8204 |
} |
| 8205 |
|
8205 |
|
| 8206 |
if (GetElementPtrInst *GEPI = dyn_cast(I)) { |
8206 |
if (GetElementPtrInst *GEPI = dyn_cast(I)) { |
| 8207 |
if (GEPI->hasAllZeroIndices()) { |
8207 |
if (GEPI->hasAllZeroIndices()) { |
| 8208 |
/// The GEP operand must be a pointer, so must its result -> BitCast |
8208 |
/// The GEP operand must be a pointer, so must its result -> BitCast |
| 8209 |
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), |
8209 |
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), |
| 8210 |
GEPI->getName(), GEPI); |
8210 |
GEPI->getName(), GEPI); |
| 8211 |
NC->setDebugLoc(GEPI->getDebugLoc()); |
8211 |
NC->setDebugLoc(GEPI->getDebugLoc()); |
| 8212 |
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc); |
8212 |
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc); |
| 8213 |
RecursivelyDeleteTriviallyDeadInstructions( |
8213 |
RecursivelyDeleteTriviallyDeadInstructions( |
| 8214 |
GEPI, TLInfo, nullptr, |
8214 |
GEPI, TLInfo, nullptr, |
| 8215 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
8215 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
| 8216 |
++NumGEPsElim; |
8216 |
++NumGEPsElim; |
| 8217 |
optimizeInst(NC, ModifiedDT); |
8217 |
optimizeInst(NC, ModifiedDT); |
| 8218 |
return true; |
8218 |
return true; |
| 8219 |
} |
8219 |
} |
| 8220 |
if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { |
8220 |
if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { |
| 8221 |
return true; |
8221 |
return true; |
| 8222 |
} |
8222 |
} |
| 8223 |
return false; |
8223 |
return false; |
| 8224 |
} |
8224 |
} |
| 8225 |
|
8225 |
|
| 8226 |
if (FreezeInst *FI = dyn_cast(I)) { |
8226 |
if (FreezeInst *FI = dyn_cast(I)) { |
| 8227 |
// freeze(icmp a, const)) -> icmp (freeze a), const |
8227 |
// freeze(icmp a, const)) -> icmp (freeze a), const |
| 8228 |
// This helps generate efficient conditional jumps. |
8228 |
// This helps generate efficient conditional jumps. |
| 8229 |
Instruction *CmpI = nullptr; |
8229 |
Instruction *CmpI = nullptr; |
| 8230 |
if (ICmpInst *II = dyn_cast(FI->getOperand(0))) |
8230 |
if (ICmpInst *II = dyn_cast(FI->getOperand(0))) |
| 8231 |
CmpI = II; |
8231 |
CmpI = II; |
| 8232 |
else if (FCmpInst *F = dyn_cast(FI->getOperand(0))) |
8232 |
else if (FCmpInst *F = dyn_cast(FI->getOperand(0))) |
| 8233 |
CmpI = F->getFastMathFlags().none() ? F : nullptr; |
8233 |
CmpI = F->getFastMathFlags().none() ? F : nullptr; |
| 8234 |
|
8234 |
|
| 8235 |
if (CmpI && CmpI->hasOneUse()) { |
8235 |
if (CmpI && CmpI->hasOneUse()) { |
| 8236 |
auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1); |
8236 |
auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1); |
| 8237 |
bool Const0 = isa(Op0) || isa(Op0) || |
8237 |
bool Const0 = isa(Op0) || isa(Op0) || |
| 8238 |
isa(Op0); |
8238 |
isa(Op0); |
| 8239 |
bool Const1 = isa(Op1) || isa(Op1) || |
8239 |
bool Const1 = isa(Op1) || isa(Op1) || |
| 8240 |
isa(Op1); |
8240 |
isa(Op1); |
| 8241 |
if (Const0 || Const1) { |
8241 |
if (Const0 || Const1) { |
| 8242 |
if (!Const0 || !Const1) { |
8242 |
if (!Const0 || !Const1) { |
| 8243 |
auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI); |
8243 |
auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI); |
| 8244 |
F->takeName(FI); |
8244 |
F->takeName(FI); |
| 8245 |
CmpI->setOperand(Const0 ? 1 : 0, F); |
8245 |
CmpI->setOperand(Const0 ? 1 : 0, F); |
| 8246 |
} |
8246 |
} |
| 8247 |
replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc); |
8247 |
replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc); |
| 8248 |
FI->eraseFromParent(); |
8248 |
FI->eraseFromParent(); |
| 8249 |
return true; |
8249 |
return true; |
| 8250 |
} |
8250 |
} |
| 8251 |
} |
8251 |
} |
| 8252 |
return false; |
8252 |
return false; |
| 8253 |
} |
8253 |
} |
| 8254 |
|
8254 |
|
| 8255 |
if (tryToSinkFreeOperands(I)) |
8255 |
if (tryToSinkFreeOperands(I)) |
| 8256 |
return true; |
8256 |
return true; |
| 8257 |
|
8257 |
|
| 8258 |
switch (I->getOpcode()) { |
8258 |
switch (I->getOpcode()) { |
| 8259 |
case Instruction::Shl: |
8259 |
case Instruction::Shl: |
| 8260 |
case Instruction::LShr: |
8260 |
case Instruction::LShr: |
| 8261 |
case Instruction::AShr: |
8261 |
case Instruction::AShr: |
| 8262 |
return optimizeShiftInst(cast(I)); |
8262 |
return optimizeShiftInst(cast(I)); |
| 8263 |
case Instruction::Call: |
8263 |
case Instruction::Call: |
| 8264 |
return optimizeCallInst(cast(I), ModifiedDT); |
8264 |
return optimizeCallInst(cast(I), ModifiedDT); |
| 8265 |
case Instruction::Select: |
8265 |
case Instruction::Select: |
| 8266 |
return optimizeSelectInst(cast(I)); |
8266 |
return optimizeSelectInst(cast(I)); |
| 8267 |
case Instruction::ShuffleVector: |
8267 |
case Instruction::ShuffleVector: |
| 8268 |
return optimizeShuffleVectorInst(cast(I)); |
8268 |
return optimizeShuffleVectorInst(cast(I)); |
| 8269 |
case Instruction::Switch: |
8269 |
case Instruction::Switch: |
| 8270 |
return optimizeSwitchInst(cast(I)); |
8270 |
return optimizeSwitchInst(cast(I)); |
| 8271 |
case Instruction::ExtractElement: |
8271 |
case Instruction::ExtractElement: |
| 8272 |
return optimizeExtractElementInst(cast(I)); |
8272 |
return optimizeExtractElementInst(cast(I)); |
| 8273 |
case Instruction::Br: |
8273 |
case Instruction::Br: |
| 8274 |
return optimizeBranch(cast(I), *TLI, FreshBBs, IsHugeFunc); |
8274 |
return optimizeBranch(cast(I), *TLI, FreshBBs, IsHugeFunc); |
| 8275 |
} |
8275 |
} |
| 8276 |
|
8276 |
|
| 8277 |
return false; |
8277 |
return false; |
| 8278 |
} |
8278 |
} |
| 8279 |
|
8279 |
|
| 8280 |
/// Given an OR instruction, check to see if this is a bitreverse |
8280 |
/// Given an OR instruction, check to see if this is a bitreverse |
| 8281 |
/// idiom. If so, insert the new intrinsic and return true. |
8281 |
/// idiom. If so, insert the new intrinsic and return true. |
| 8282 |
bool CodeGenPrepare::makeBitReverse(Instruction &I) { |
8282 |
bool CodeGenPrepare::makeBitReverse(Instruction &I) { |
| 8283 |
if (!I.getType()->isIntegerTy() || |
8283 |
if (!I.getType()->isIntegerTy() || |
| 8284 |
!TLI->isOperationLegalOrCustom(ISD::BITREVERSE, |
8284 |
!TLI->isOperationLegalOrCustom(ISD::BITREVERSE, |
| 8285 |
TLI->getValueType(*DL, I.getType(), true))) |
8285 |
TLI->getValueType(*DL, I.getType(), true))) |
| 8286 |
return false; |
8286 |
return false; |
| 8287 |
|
8287 |
|
| 8288 |
SmallVector Insts; |
8288 |
SmallVector Insts; |
| 8289 |
if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) |
8289 |
if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) |
| 8290 |
return false; |
8290 |
return false; |
| 8291 |
Instruction *LastInst = Insts.back(); |
8291 |
Instruction *LastInst = Insts.back(); |
| 8292 |
replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc); |
8292 |
replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc); |
| 8293 |
RecursivelyDeleteTriviallyDeadInstructions( |
8293 |
RecursivelyDeleteTriviallyDeadInstructions( |
| 8294 |
&I, TLInfo, nullptr, |
8294 |
&I, TLInfo, nullptr, |
| 8295 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
8295 |
[&](Value *V) { removeAllAssertingVHReferences(V); }); |
| 8296 |
return true; |
8296 |
return true; |
| 8297 |
} |
8297 |
} |
| 8298 |
|
8298 |
|
| 8299 |
// In this pass we look for GEP and cast instructions that are used |
8299 |
// In this pass we look for GEP and cast instructions that are used |
| 8300 |
// across basic blocks and rewrite them to improve basic-block-at-a-time |
8300 |
// across basic blocks and rewrite them to improve basic-block-at-a-time |
| 8301 |
// selection. |
8301 |
// selection. |
| 8302 |
bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) { |
8302 |
bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) { |
| 8303 |
SunkAddrs.clear(); |
8303 |
SunkAddrs.clear(); |
| 8304 |
bool MadeChange = false; |
8304 |
bool MadeChange = false; |
| 8305 |
|
8305 |
|
| 8306 |
do { |
8306 |
do { |
| 8307 |
CurInstIterator = BB.begin(); |
8307 |
CurInstIterator = BB.begin(); |
| 8308 |
ModifiedDT = ModifyDT::NotModifyDT; |
8308 |
ModifiedDT = ModifyDT::NotModifyDT; |
| 8309 |
while (CurInstIterator != BB.end()) { |
8309 |
while (CurInstIterator != BB.end()) { |
| 8310 |
MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); |
8310 |
MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); |
| 8311 |
if (ModifiedDT != ModifyDT::NotModifyDT) { |
8311 |
if (ModifiedDT != ModifyDT::NotModifyDT) { |
| 8312 |
// For huge function we tend to quickly go though the inner optmization |
8312 |
// For huge function we tend to quickly go though the inner optmization |
| 8313 |
// opportunities in the BB. So we go back to the BB head to re-optimize |
8313 |
// opportunities in the BB. So we go back to the BB head to re-optimize |
| 8314 |
// each instruction instead of go back to the function head. |
8314 |
// each instruction instead of go back to the function head. |
| 8315 |
if (IsHugeFunc) { |
8315 |
if (IsHugeFunc) { |
| 8316 |
DT.reset(); |
8316 |
DT.reset(); |
| 8317 |
getDT(*BB.getParent()); |
8317 |
getDT(*BB.getParent()); |
| 8318 |
break; |
8318 |
break; |
| 8319 |
} else { |
8319 |
} else { |
| 8320 |
return true; |
8320 |
return true; |
| 8321 |
} |
8321 |
} |
| 8322 |
} |
8322 |
} |
| 8323 |
} |
8323 |
} |
| 8324 |
} while (ModifiedDT == ModifyDT::ModifyInstDT); |
8324 |
} while (ModifiedDT == ModifyDT::ModifyInstDT); |
| 8325 |
|
8325 |
|
| 8326 |
bool MadeBitReverse = true; |
8326 |
bool MadeBitReverse = true; |
| 8327 |
while (MadeBitReverse) { |
8327 |
while (MadeBitReverse) { |
| 8328 |
MadeBitReverse = false; |
8328 |
MadeBitReverse = false; |
| 8329 |
for (auto &I : reverse(BB)) { |
8329 |
for (auto &I : reverse(BB)) { |
| 8330 |
if (makeBitReverse(I)) { |
8330 |
if (makeBitReverse(I)) { |
| 8331 |
MadeBitReverse = MadeChange = true; |
8331 |
MadeBitReverse = MadeChange = true; |
| 8332 |
break; |
8332 |
break; |
| 8333 |
} |
8333 |
} |
| 8334 |
} |
8334 |
} |
| 8335 |
} |
8335 |
} |
| 8336 |
MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT); |
8336 |
MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT); |
| 8337 |
|
8337 |
|
| 8338 |
return MadeChange; |
8338 |
return MadeChange; |
| 8339 |
} |
8339 |
} |
| 8340 |
|
8340 |
|
| 8341 |
// Some CGP optimizations may move or alter what's computed in a block. Check |
8341 |
// Some CGP optimizations may move or alter what's computed in a block. Check |
| 8342 |
// whether a dbg.value intrinsic could be pointed at a more appropriate operand. |
8342 |
// whether a dbg.value intrinsic could be pointed at a more appropriate operand. |
| 8343 |
bool CodeGenPrepare::fixupDbgValue(Instruction *I) { |
8343 |
bool CodeGenPrepare::fixupDbgValue(Instruction *I) { |
| 8344 |
assert(isa(I)); |
8344 |
assert(isa(I)); |
| 8345 |
DbgValueInst &DVI = *cast(I); |
8345 |
DbgValueInst &DVI = *cast(I); |
| 8346 |
|
8346 |
|
| 8347 |
// Does this dbg.value refer to a sunk address calculation? |
8347 |
// Does this dbg.value refer to a sunk address calculation? |
| 8348 |
bool AnyChange = false; |
8348 |
bool AnyChange = false; |
| 8349 |
SmallDenseSet LocationOps(DVI.location_ops().begin(), |
8349 |
SmallDenseSet LocationOps(DVI.location_ops().begin(), |
| 8350 |
DVI.location_ops().end()); |
8350 |
DVI.location_ops().end()); |
| 8351 |
for (Value *Location : LocationOps) { |
8351 |
for (Value *Location : LocationOps) { |
| 8352 |
WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; |
8352 |
WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; |
| 8353 |
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; |
8353 |
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; |
| 8354 |
if (SunkAddr) { |
8354 |
if (SunkAddr) { |
| 8355 |
// Point dbg.value at locally computed address, which should give the best |
8355 |
// Point dbg.value at locally computed address, which should give the best |
| 8356 |
// opportunity to be accurately lowered. This update may change the type |
8356 |
// opportunity to be accurately lowered. This update may change the type |
| 8357 |
// of pointer being referred to; however this makes no difference to |
8357 |
// of pointer being referred to; however this makes no difference to |
| 8358 |
// debugging information, and we can't generate bitcasts that may affect |
8358 |
// debugging information, and we can't generate bitcasts that may affect |
| 8359 |
// codegen. |
8359 |
// codegen. |
| 8360 |
DVI.replaceVariableLocationOp(Location, SunkAddr); |
8360 |
DVI.replaceVariableLocationOp(Location, SunkAddr); |
| 8361 |
AnyChange = true; |
8361 |
AnyChange = true; |
| 8362 |
} |
8362 |
} |
| 8363 |
} |
8363 |
} |
| 8364 |
return AnyChange; |
8364 |
return AnyChange; |
| 8365 |
} |
8365 |
} |
| 8366 |
|
8366 |
|
| 8367 |
// A llvm.dbg.value may be using a value before its definition, due to |
8367 |
// A llvm.dbg.value may be using a value before its definition, due to |
| 8368 |
// optimizations in this pass and others. Scan for such dbg.values, and rescue |
8368 |
// optimizations in this pass and others. Scan for such dbg.values, and rescue |
| 8369 |
// them by moving the dbg.value to immediately after the value definition. |
8369 |
// them by moving the dbg.value to immediately after the value definition. |
| 8370 |
// FIXME: Ideally this should never be necessary, and this has the potential |
8370 |
// FIXME: Ideally this should never be necessary, and this has the potential |
| 8371 |
// to re-order dbg.value intrinsics. |
8371 |
// to re-order dbg.value intrinsics. |
| 8372 |
bool CodeGenPrepare::placeDbgValues(Function &F) { |
8372 |
bool CodeGenPrepare::placeDbgValues(Function &F) { |
| 8373 |
bool MadeChange = false; |
8373 |
bool MadeChange = false; |
| 8374 |
DominatorTree DT(F); |
8374 |
DominatorTree DT(F); |
| 8375 |
|
8375 |
|
| 8376 |
for (BasicBlock &BB : F) { |
8376 |
for (BasicBlock &BB : F) { |
| 8377 |
for (Instruction &Insn : llvm::make_early_inc_range(BB)) { |
8377 |
for (Instruction &Insn : llvm::make_early_inc_range(BB)) { |
| 8378 |
DbgValueInst *DVI = dyn_cast(&Insn); |
8378 |
DbgValueInst *DVI = dyn_cast(&Insn); |
| 8379 |
if (!DVI) |
8379 |
if (!DVI) |
| 8380 |
continue; |
8380 |
continue; |
| 8381 |
|
8381 |
|
| 8382 |
SmallVector VIs; |
8382 |
SmallVector VIs; |
| 8383 |
for (Value *V : DVI->getValues()) |
8383 |
for (Value *V : DVI->getValues()) |
| 8384 |
if (Instruction *VI = dyn_cast_or_null(V)) |
8384 |
if (Instruction *VI = dyn_cast_or_null(V)) |
| 8385 |
VIs.push_back(VI); |
8385 |
VIs.push_back(VI); |
| 8386 |
|
8386 |
|
| 8387 |
// This DVI may depend on multiple instructions, complicating any |
8387 |
// This DVI may depend on multiple instructions, complicating any |
| 8388 |
// potential sink. This block takes the defensive approach, opting to |
8388 |
// potential sink. This block takes the defensive approach, opting to |
| 8389 |
// "undef" the DVI if it has more than one instruction and any of them do |
8389 |
// "undef" the DVI if it has more than one instruction and any of them do |
| 8390 |
// not dominate DVI. |
8390 |
// not dominate DVI. |
| 8391 |
for (Instruction *VI : VIs) { |
8391 |
for (Instruction *VI : VIs) { |
| 8392 |
if (VI->isTerminator()) |
8392 |
if (VI->isTerminator()) |
| 8393 |
continue; |
8393 |
continue; |
| 8394 |
|
8394 |
|
| 8395 |
// If VI is a phi in a block with an EHPad terminator, we can't insert |
8395 |
// If VI is a phi in a block with an EHPad terminator, we can't insert |
| 8396 |
// after it. |
8396 |
// after it. |
| 8397 |
if (isa(VI) && VI->getParent()->getTerminator()->isEHPad()) |
8397 |
if (isa(VI) && VI->getParent()->getTerminator()->isEHPad()) |
| 8398 |
continue; |
8398 |
continue; |
| 8399 |
|
8399 |
|
| 8400 |
// If the defining instruction dominates the dbg.value, we do not need |
8400 |
// If the defining instruction dominates the dbg.value, we do not need |
| 8401 |
// to move the dbg.value. |
8401 |
// to move the dbg.value. |
| 8402 |
if (DT.dominates(VI, DVI)) |
8402 |
if (DT.dominates(VI, DVI)) |
| 8403 |
continue; |
8403 |
continue; |
| 8404 |
|
8404 |
|
| 8405 |
// If we depend on multiple instructions and any of them doesn't |
8405 |
// If we depend on multiple instructions and any of them doesn't |
| 8406 |
// dominate this DVI, we probably can't salvage it: moving it to |
8406 |
// dominate this DVI, we probably can't salvage it: moving it to |
| 8407 |
// after any of the instructions could cause us to lose the others. |
8407 |
// after any of the instructions could cause us to lose the others. |
| 8408 |
if (VIs.size() > 1) { |
8408 |
if (VIs.size() > 1) { |
| 8409 |
LLVM_DEBUG( |
8409 |
LLVM_DEBUG( |
| 8410 |
dbgs() |
8410 |
dbgs() |
| 8411 |
<< "Unable to find valid location for Debug Value, undefing:\n" |
8411 |
<< "Unable to find valid location for Debug Value, undefing:\n" |
| 8412 |
<< *DVI); |
8412 |
<< *DVI); |
| 8413 |
DVI->setKillLocation(); |
8413 |
DVI->setKillLocation(); |
| 8414 |
break; |
8414 |
break; |
| 8415 |
} |
8415 |
} |
| 8416 |
|
8416 |
|
| 8417 |
LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" |
8417 |
LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" |
| 8418 |
<< *DVI << ' ' << *VI); |
8418 |
<< *DVI << ' ' << *VI); |
| 8419 |
DVI->removeFromParent(); |
8419 |
DVI->removeFromParent(); |
| 8420 |
if (isa(VI)) |
8420 |
if (isa(VI)) |
| 8421 |
DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); |
8421 |
DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); |
| 8422 |
else |
8422 |
else |
| 8423 |
DVI->insertAfter(VI); |
8423 |
DVI->insertAfter(VI); |
| 8424 |
MadeChange = true; |
8424 |
MadeChange = true; |
| 8425 |
++NumDbgValueMoved; |
8425 |
++NumDbgValueMoved; |
| 8426 |
} |
8426 |
} |
| 8427 |
} |
8427 |
} |
| 8428 |
} |
8428 |
} |
| 8429 |
return MadeChange; |
8429 |
return MadeChange; |
| 8430 |
} |
8430 |
} |
| 8431 |
|
8431 |
|
| 8432 |
// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered |
8432 |
// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered |
| 8433 |
// probes can be chained dependencies of other regular DAG nodes and block DAG |
8433 |
// probes can be chained dependencies of other regular DAG nodes and block DAG |
| 8434 |
// combine optimizations. |
8434 |
// combine optimizations. |
| 8435 |
bool CodeGenPrepare::placePseudoProbes(Function &F) { |
8435 |
bool CodeGenPrepare::placePseudoProbes(Function &F) { |
| 8436 |
bool MadeChange = false; |
8436 |
bool MadeChange = false; |
| 8437 |
for (auto &Block : F) { |
8437 |
for (auto &Block : F) { |
| 8438 |
// Move the rest probes to the beginning of the block. |
8438 |
// Move the rest probes to the beginning of the block. |
| 8439 |
auto FirstInst = Block.getFirstInsertionPt(); |
8439 |
auto FirstInst = Block.getFirstInsertionPt(); |
| 8440 |
while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst()) |
8440 |
while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst()) |
| 8441 |
++FirstInst; |
8441 |
++FirstInst; |
| 8442 |
BasicBlock::iterator I(FirstInst); |
8442 |
BasicBlock::iterator I(FirstInst); |
| 8443 |
I++; |
8443 |
I++; |
| 8444 |
while (I != Block.end()) { |
8444 |
while (I != Block.end()) { |
| 8445 |
if (auto *II = dyn_cast(I++)) { |
8445 |
if (auto *II = dyn_cast(I++)) { |
| 8446 |
II->moveBefore(&*FirstInst); |
8446 |
II->moveBefore(&*FirstInst); |
| 8447 |
MadeChange = true; |
8447 |
MadeChange = true; |
| 8448 |
} |
8448 |
} |
| 8449 |
} |
8449 |
} |
| 8450 |
} |
8450 |
} |
| 8451 |
return MadeChange; |
8451 |
return MadeChange; |
| 8452 |
} |
8452 |
} |
| 8453 |
|
8453 |
|
| 8454 |
/// Scale down both weights to fit into uint32_t. |
8454 |
/// Scale down both weights to fit into uint32_t. |
| 8455 |
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { |
8455 |
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { |
| 8456 |
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; |
8456 |
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; |
| 8457 |
uint32_t Scale = (NewMax / std::numeric_limits::max()) + 1; |
8457 |
uint32_t Scale = (NewMax / std::numeric_limits::max()) + 1; |
| 8458 |
NewTrue = NewTrue / Scale; |
8458 |
NewTrue = NewTrue / Scale; |
| 8459 |
NewFalse = NewFalse / Scale; |
8459 |
NewFalse = NewFalse / Scale; |
| 8460 |
} |
8460 |
} |
| 8461 |
|
8461 |
|
| 8462 |
/// Some targets prefer to split a conditional branch like: |
8462 |
/// Some targets prefer to split a conditional branch like: |
| 8463 |
/// \code |
8463 |
/// \code |
| 8464 |
/// %0 = icmp ne i32 %a, 0 |
8464 |
/// %0 = icmp ne i32 %a, 0 |
| 8465 |
/// %1 = icmp ne i32 %b, 0 |
8465 |
/// %1 = icmp ne i32 %b, 0 |
| 8466 |
/// %or.cond = or i1 %0, %1 |
8466 |
/// %or.cond = or i1 %0, %1 |
| 8467 |
/// br i1 %or.cond, label %TrueBB, label %FalseBB |
8467 |
/// br i1 %or.cond, label %TrueBB, label %FalseBB |
| 8468 |
/// \endcode |
8468 |
/// \endcode |
| 8469 |
/// into multiple branch instructions like: |
8469 |
/// into multiple branch instructions like: |
| 8470 |
/// \code |
8470 |
/// \code |
| 8471 |
/// bb1: |
8471 |
/// bb1: |
| 8472 |
/// %0 = icmp ne i32 %a, 0 |
8472 |
/// %0 = icmp ne i32 %a, 0 |
| 8473 |
/// br i1 %0, label %TrueBB, label %bb2 |
8473 |
/// br i1 %0, label %TrueBB, label %bb2 |
| 8474 |
/// bb2: |
8474 |
/// bb2: |
| 8475 |
/// %1 = icmp ne i32 %b, 0 |
8475 |
/// %1 = icmp ne i32 %b, 0 |
| 8476 |
/// br i1 %1, label %TrueBB, label %FalseBB |
8476 |
/// br i1 %1, label %TrueBB, label %FalseBB |
| 8477 |
/// \endcode |
8477 |
/// \endcode |
| 8478 |
/// This usually allows instruction selection to do even further optimizations |
8478 |
/// This usually allows instruction selection to do even further optimizations |
| 8479 |
/// and combine the compare with the branch instruction. Currently this is |
8479 |
/// and combine the compare with the branch instruction. Currently this is |
| 8480 |
/// applied for targets which have "cheap" jump instructions. |
8480 |
/// applied for targets which have "cheap" jump instructions. |
| 8481 |
/// |
8481 |
/// |
| 8482 |
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. |
8482 |
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. |
| 8483 |
/// |
8483 |
/// |
| 8484 |
bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) { |
8484 |
bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) { |
| 8485 |
if (!TM->Options.EnableFastISel || TLI->isJumpExpensive()) |
8485 |
if (!TM->Options.EnableFastISel || TLI->isJumpExpensive()) |
| 8486 |
return false; |
8486 |
return false; |
| 8487 |
|
8487 |
|
| 8488 |
bool MadeChange = false; |
8488 |
bool MadeChange = false; |
| 8489 |
for (auto &BB : F) { |
8489 |
for (auto &BB : F) { |
| 8490 |
// Does this BB end with the following? |
8490 |
// Does this BB end with the following? |
| 8491 |
// %cond1 = icmp|fcmp|binary instruction ... |
8491 |
// %cond1 = icmp|fcmp|binary instruction ... |
| 8492 |
// %cond2 = icmp|fcmp|binary instruction ... |
8492 |
// %cond2 = icmp|fcmp|binary instruction ... |
| 8493 |
// %cond.or = or|and i1 %cond1, cond2 |
8493 |
// %cond.or = or|and i1 %cond1, cond2 |
| 8494 |
// br i1 %cond.or label %dest1, label %dest2" |
8494 |
// br i1 %cond.or label %dest1, label %dest2" |
| 8495 |
Instruction *LogicOp; |
8495 |
Instruction *LogicOp; |
| 8496 |
BasicBlock *TBB, *FBB; |
8496 |
BasicBlock *TBB, *FBB; |
| 8497 |
if (!match(BB.getTerminator(), |
8497 |
if (!match(BB.getTerminator(), |
| 8498 |
m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB))) |
8498 |
m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB))) |
| 8499 |
continue; |
8499 |
continue; |
| 8500 |
|
8500 |
|
| 8501 |
auto *Br1 = cast(BB.getTerminator()); |
8501 |
auto *Br1 = cast(BB.getTerminator()); |
| 8502 |
if (Br1->getMetadata(LLVMContext::MD_unpredictable)) |
8502 |
if (Br1->getMetadata(LLVMContext::MD_unpredictable)) |
| 8503 |
continue; |
8503 |
continue; |
| 8504 |
|
8504 |
|
| 8505 |
// The merging of mostly empty BB can cause a degenerate branch. |
8505 |
// The merging of mostly empty BB can cause a degenerate branch. |
| 8506 |
if (TBB == FBB) |
8506 |
if (TBB == FBB) |
| 8507 |
continue; |
8507 |
continue; |
| 8508 |
|
8508 |
|
| 8509 |
unsigned Opc; |
8509 |
unsigned Opc; |
| 8510 |
Value *Cond1, *Cond2; |
8510 |
Value *Cond1, *Cond2; |
| 8511 |
if (match(LogicOp, |
8511 |
if (match(LogicOp, |
| 8512 |
m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) |
8512 |
m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) |
| 8513 |
Opc = Instruction::And; |
8513 |
Opc = Instruction::And; |
| 8514 |
else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)), |
8514 |
else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)), |
| 8515 |
m_OneUse(m_Value(Cond2))))) |
8515 |
m_OneUse(m_Value(Cond2))))) |
| 8516 |
Opc = Instruction::Or; |
8516 |
Opc = Instruction::Or; |
| 8517 |
else |
8517 |
else |
| 8518 |
continue; |
8518 |
continue; |
| 8519 |
|
8519 |
|
| 8520 |
auto IsGoodCond = [](Value *Cond) { |
8520 |
auto IsGoodCond = [](Value *Cond) { |
| 8521 |
return match( |
8521 |
return match( |
| 8522 |
Cond, |
8522 |
Cond, |
| 8523 |
m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), |
8523 |
m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), |
| 8524 |
m_LogicalOr(m_Value(), m_Value())))); |
8524 |
m_LogicalOr(m_Value(), m_Value())))); |
| 8525 |
}; |
8525 |
}; |
| 8526 |
if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2)) |
8526 |
if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2)) |
| 8527 |
continue; |
8527 |
continue; |
| 8528 |
|
8528 |
|
| 8529 |
LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); |
8529 |
LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); |
| 8530 |
|
8530 |
|
| 8531 |
// Create a new BB. |
8531 |
// Create a new BB. |
| 8532 |
auto *TmpBB = |
8532 |
auto *TmpBB = |
| 8533 |
BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", |
8533 |
BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", |
| 8534 |
BB.getParent(), BB.getNextNode()); |
8534 |
BB.getParent(), BB.getNextNode()); |
| 8535 |
if (IsHugeFunc) |
8535 |
if (IsHugeFunc) |
| 8536 |
FreshBBs.insert(TmpBB); |
8536 |
FreshBBs.insert(TmpBB); |
| 8537 |
|
8537 |
|
| 8538 |
// Update original basic block by using the first condition directly by the |
8538 |
// Update original basic block by using the first condition directly by the |
| 8539 |
// branch instruction and removing the no longer needed and/or instruction. |
8539 |
// branch instruction and removing the no longer needed and/or instruction. |
| 8540 |
Br1->setCondition(Cond1); |
8540 |
Br1->setCondition(Cond1); |
| 8541 |
LogicOp->eraseFromParent(); |
8541 |
LogicOp->eraseFromParent(); |
| 8542 |
|
8542 |
|
| 8543 |
// Depending on the condition we have to either replace the true or the |
8543 |
// Depending on the condition we have to either replace the true or the |
| 8544 |
// false successor of the original branch instruction. |
8544 |
// false successor of the original branch instruction. |
| 8545 |
if (Opc == Instruction::And) |
8545 |
if (Opc == Instruction::And) |
| 8546 |
Br1->setSuccessor(0, TmpBB); |
8546 |
Br1->setSuccessor(0, TmpBB); |
| 8547 |
else |
8547 |
else |
| 8548 |
Br1->setSuccessor(1, TmpBB); |
8548 |
Br1->setSuccessor(1, TmpBB); |
| 8549 |
|
8549 |
|
| 8550 |
// Fill in the new basic block. |
8550 |
// Fill in the new basic block. |
| 8551 |
auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); |
8551 |
auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); |
| 8552 |
if (auto *I = dyn_cast(Cond2)) { |
8552 |
if (auto *I = dyn_cast(Cond2)) { |
| 8553 |
I->removeFromParent(); |
8553 |
I->removeFromParent(); |
| 8554 |
I->insertBefore(Br2); |
8554 |
I->insertBefore(Br2); |
| 8555 |
} |
8555 |
} |
| 8556 |
|
8556 |
|
| 8557 |
// Update PHI nodes in both successors. The original BB needs to be |
8557 |
// Update PHI nodes in both successors. The original BB needs to be |
| 8558 |
// replaced in one successor's PHI nodes, because the branch comes now from |
8558 |
// replaced in one successor's PHI nodes, because the branch comes now from |
| 8559 |
// the newly generated BB (NewBB). In the other successor we need to add one |
8559 |
// the newly generated BB (NewBB). In the other successor we need to add one |
| 8560 |
// incoming edge to the PHI nodes, because both branch instructions target |
8560 |
// incoming edge to the PHI nodes, because both branch instructions target |
| 8561 |
// now the same successor. Depending on the original branch condition |
8561 |
// now the same successor. Depending on the original branch condition |
| 8562 |
// (and/or) we have to swap the successors (TrueDest, FalseDest), so that |
8562 |
// (and/or) we have to swap the successors (TrueDest, FalseDest), so that |
| 8563 |
// we perform the correct update for the PHI nodes. |
8563 |
// we perform the correct update for the PHI nodes. |
| 8564 |
// This doesn't change the successor order of the just created branch |
8564 |
// This doesn't change the successor order of the just created branch |
| 8565 |
// instruction (or any other instruction). |
8565 |
// instruction (or any other instruction). |
| 8566 |
if (Opc == Instruction::Or) |
8566 |
if (Opc == Instruction::Or) |
| 8567 |
std::swap(TBB, FBB); |
8567 |
std::swap(TBB, FBB); |
| 8568 |
|
8568 |
|
| 8569 |
// Replace the old BB with the new BB. |
8569 |
// Replace the old BB with the new BB. |
| 8570 |
TBB->replacePhiUsesWith(&BB, TmpBB); |
8570 |
TBB->replacePhiUsesWith(&BB, TmpBB); |
| 8571 |
|
8571 |
|
| 8572 |
// Add another incoming edge from the new BB. |
8572 |
// Add another incoming edge from the new BB. |
| 8573 |
for (PHINode &PN : FBB->phis()) { |
8573 |
for (PHINode &PN : FBB->phis()) { |
| 8574 |
auto *Val = PN.getIncomingValueForBlock(&BB); |
8574 |
auto *Val = PN.getIncomingValueForBlock(&BB); |
| 8575 |
PN.addIncoming(Val, TmpBB); |
8575 |
PN.addIncoming(Val, TmpBB); |
| 8576 |
} |
8576 |
} |
| 8577 |
|
8577 |
|
| 8578 |
// Update the branch weights (from SelectionDAGBuilder:: |
8578 |
// Update the branch weights (from SelectionDAGBuilder:: |
| 8579 |
// FindMergedConditions). |
8579 |
// FindMergedConditions). |
| 8580 |
if (Opc == Instruction::Or) { |
8580 |
if (Opc == Instruction::Or) { |
| 8581 |
// Codegen X | Y as: |
8581 |
// Codegen X | Y as: |
| 8582 |
// BB1: |
8582 |
// BB1: |
| 8583 |
// jmp_if_X TBB |
8583 |
// jmp_if_X TBB |
| 8584 |
// jmp TmpBB |
8584 |
// jmp TmpBB |
| 8585 |
// TmpBB: |
8585 |
// TmpBB: |
| 8586 |
// jmp_if_Y TBB |
8586 |
// jmp_if_Y TBB |
| 8587 |
// jmp FBB |
8587 |
// jmp FBB |
| 8588 |
// |
8588 |
// |
| 8589 |
|
8589 |
|
| 8590 |
// We have flexibility in setting Prob for BB1 and Prob for NewBB. |
8590 |
// We have flexibility in setting Prob for BB1 and Prob for NewBB. |
| 8591 |
// The requirement is that |
8591 |
// The requirement is that |
| 8592 |
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) |
8592 |
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) |
| 8593 |
// = TrueProb for original BB. |
8593 |
// = TrueProb for original BB. |
| 8594 |
// Assuming the original weights are A and B, one choice is to set BB1's |
8594 |
// Assuming the original weights are A and B, one choice is to set BB1's |
| 8595 |
// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice |
8595 |
// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice |
| 8596 |
// assumes that |
8596 |
// assumes that |
| 8597 |
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. |
8597 |
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. |
| 8598 |
// Another choice is to assume TrueProb for BB1 equals to TrueProb for |
8598 |
// Another choice is to assume TrueProb for BB1 equals to TrueProb for |
| 8599 |
// TmpBB, but the math is more complicated. |
8599 |
// TmpBB, but the math is more complicated. |
| 8600 |
uint64_t TrueWeight, FalseWeight; |
8600 |
uint64_t TrueWeight, FalseWeight; |
| 8601 |
if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { |
8601 |
if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { |
| 8602 |
uint64_t NewTrueWeight = TrueWeight; |
8602 |
uint64_t NewTrueWeight = TrueWeight; |
| 8603 |
uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; |
8603 |
uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; |
| 8604 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
8604 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
| 8605 |
Br1->setMetadata(LLVMContext::MD_prof, |
8605 |
Br1->setMetadata(LLVMContext::MD_prof, |
| 8606 |
MDBuilder(Br1->getContext()) |
8606 |
MDBuilder(Br1->getContext()) |
| 8607 |
.createBranchWeights(TrueWeight, FalseWeight)); |
8607 |
.createBranchWeights(TrueWeight, FalseWeight)); |
| 8608 |
|
8608 |
|
| 8609 |
NewTrueWeight = TrueWeight; |
8609 |
NewTrueWeight = TrueWeight; |
| 8610 |
NewFalseWeight = 2 * FalseWeight; |
8610 |
NewFalseWeight = 2 * FalseWeight; |
| 8611 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
8611 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
| 8612 |
Br2->setMetadata(LLVMContext::MD_prof, |
8612 |
Br2->setMetadata(LLVMContext::MD_prof, |
| 8613 |
MDBuilder(Br2->getContext()) |
8613 |
MDBuilder(Br2->getContext()) |
| 8614 |
.createBranchWeights(TrueWeight, FalseWeight)); |
8614 |
.createBranchWeights(TrueWeight, FalseWeight)); |
| 8615 |
} |
8615 |
} |
| 8616 |
} else { |
8616 |
} else { |
| 8617 |
// Codegen X & Y as: |
8617 |
// Codegen X & Y as: |
| 8618 |
// BB1: |
8618 |
// BB1: |
| 8619 |
// jmp_if_X TmpBB |
8619 |
// jmp_if_X TmpBB |
| 8620 |
// jmp FBB |
8620 |
// jmp FBB |
| 8621 |
// TmpBB: |
8621 |
// TmpBB: |
| 8622 |
// jmp_if_Y TBB |
8622 |
// jmp_if_Y TBB |
| 8623 |
// jmp FBB |
8623 |
// jmp FBB |
| 8624 |
// |
8624 |
// |
| 8625 |
// This requires creation of TmpBB after CurBB. |
8625 |
// This requires creation of TmpBB after CurBB. |
| 8626 |
|
8626 |
|
| 8627 |
// We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
8627 |
// We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
| 8628 |
// The requirement is that |
8628 |
// The requirement is that |
| 8629 |
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) |
8629 |
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) |
| 8630 |
// = FalseProb for original BB. |
8630 |
// = FalseProb for original BB. |
| 8631 |
// Assuming the original weights are A and B, one choice is to set BB1's |
8631 |
// Assuming the original weights are A and B, one choice is to set BB1's |
| 8632 |
// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice |
8632 |
// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice |
| 8633 |
// assumes that |
8633 |
// assumes that |
| 8634 |
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. |
8634 |
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. |
| 8635 |
uint64_t TrueWeight, FalseWeight; |
8635 |
uint64_t TrueWeight, FalseWeight; |
| 8636 |
if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { |
8636 |
if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) { |
| 8637 |
uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; |
8637 |
uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; |
| 8638 |
uint64_t NewFalseWeight = FalseWeight; |
8638 |
uint64_t NewFalseWeight = FalseWeight; |
| 8639 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
8639 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
| 8640 |
Br1->setMetadata(LLVMContext::MD_prof, |
8640 |
Br1->setMetadata(LLVMContext::MD_prof, |
| 8641 |
MDBuilder(Br1->getContext()) |
8641 |
MDBuilder(Br1->getContext()) |
| 8642 |
.createBranchWeights(TrueWeight, FalseWeight)); |
8642 |
.createBranchWeights(TrueWeight, FalseWeight)); |
| 8643 |
|
8643 |
|
| 8644 |
NewTrueWeight = 2 * TrueWeight; |
8644 |
NewTrueWeight = 2 * TrueWeight; |
| 8645 |
NewFalseWeight = FalseWeight; |
8645 |
NewFalseWeight = FalseWeight; |
| 8646 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
8646 |
scaleWeights(NewTrueWeight, NewFalseWeight); |
| 8647 |
Br2->setMetadata(LLVMContext::MD_prof, |
8647 |
Br2->setMetadata(LLVMContext::MD_prof, |
| 8648 |
MDBuilder(Br2->getContext()) |
8648 |
MDBuilder(Br2->getContext()) |
| 8649 |
.createBranchWeights(TrueWeight, FalseWeight)); |
8649 |
.createBranchWeights(TrueWeight, FalseWeight)); |
| 8650 |
} |
8650 |
} |
| 8651 |
} |
8651 |
} |
| 8652 |
|
8652 |
|
| 8653 |
ModifiedDT = ModifyDT::ModifyBBDT; |
8653 |
ModifiedDT = ModifyDT::ModifyBBDT; |
| 8654 |
MadeChange = true; |
8654 |
MadeChange = true; |
| 8655 |
|
8655 |
|
| 8656 |
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); |
8656 |
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); |
| 8657 |
TmpBB->dump()); |
8657 |
TmpBB->dump()); |
| 8658 |
} |
8658 |
} |
| 8659 |
return MadeChange; |
8659 |
return MadeChange; |
| 8660 |
} |
8660 |
} |
| 8661 |
|
8661 |
|